Ejemplo n.º 1
0
    def _apply_time_adjustments(self, dp):
        """
        Some drivers provide adjustments for 'TAI', 'TBI', and 'TInr', for example 'wult_igb'. The
        adjustments are there for improving measurement accuracy, and they are in CPU cycles. This
        function adjusts 'SilentTime', 'WakeLatency', and 'IntrLatency' accordingly.

        This function also validates the adjusted values. Returns the datapoint in case of success
        and 'None' if the datapoint has to be dropped.
        """

        # Apply the adjustments if the driver provides them.
        if "TBIAdjCyc" in dp:
            tbi_adj = self._cyc_to_ns(dp["TBIAdjCyc"])
            dp["SilentTimeRaw"] = dp["SilentTime"]
            dp["SilentTime"] -= tbi_adj

            if dp["TBI"] + tbi_adj >= dp["LTime"]:
                _LOG.debug(
                    "adjusted 'TBI' is greater than 'LTime', the scheduled event must have "
                    "happened before the CPU entered the idle state. The datapoint is:\n%s\n"
                    "Adjusted 'TBI' is %d + %d = %d ns\nDropping this datapoint\n",
                    Human.dict2str(dp), dp["TBI"], tbi_adj,
                    dp["TBI"] + tbi_adj)
                return None

        if "TAIAdjCyc" in dp:
            tai_adj = self._cyc_to_ns(dp["TAIAdjCyc"])
            tintr_adj = self._cyc_to_ns(dp["TIntrAdjCyc"])

            dp["WakeLatencyRaw"] = dp["WakeLatency"]
            dp["IntrLatencyRaw"] = dp["IntrLatency"]
            dp["WakeLatency"] -= tai_adj
            dp["IntrLatency"] -= tintr_adj

            if dp["TAI"] - tai_adj <= dp["LTime"]:
                _LOG.debug(
                    "adjusted 'TAI' is smaller than 'LTime', the CPU must have woken up "
                    "before 'LTime'. The datapoint is:\n%s\n"
                    "Adjusted 'TAI' is %d - %d = %d ns\nDropping this datapoint\n",
                    Human.dict2str(dp), dp["TAI"], tai_adj,
                    dp["TAI"] - tai_adj)
                return None
            if dp["TIntr"] - tintr_adj <= dp["LTime"]:
                _LOG.debug(
                    "adjusted 'TIntr' is smaller than 'LTime', the CPU must have woken up "
                    "before 'LTime'. The datapoint is:\n%s\n"
                    "Adjusted 'TIntr' is %d - %d = %d ns\nDropping this datapoint\n",
                    Human.dict2str(dp), dp["TIntr"], tintr_adj,
                    dp["TIntr"] - tintr_adj)
                return None

        return dp
Ejemplo n.º 2
0
    def _calculate_tsc_rate(self, rawdp):
        """
        TSC rate is calculated using 'BICyc' and 'BIMonotinic' raw datapoint fields. These fields
        are read one after another with interrupts disabled. The former is "TSC cycles Before Idle",
        the latter stands for "Monotonic time Before Idle". The "Before Idle" part is not relevant
        here at all, it just tells that these counters were read just before the system enters an
        idle state.

        We need a couple of datapoints far enough apart in order to calculate TSC rate. This method
        is called for every datapoint, and once there are a couple of datapoints
        'self.tsc_cal_time' seconds apart, this function calculates TSC rate and stores it in
        'self.tsc_mhz'.
        """

        if self.tsc_mhz:
            # TSC rate is already known, skip the calculations.
            return

        if rawdp["SMICnt"] != 0 or rawdp["NMICnt"] != 0:
            # Do not use this datapoint, there was an SMI or NMI, and there is a chance that it
            # happened between the 'BICyc' and 'BIMonotinic' reads, which may skew our TSC rate
            # calculations.
            _LOG.debug(
                "NMI/SMI detected, won't use the datapoint for TSC rate calculations:\n%s",
                Human.dict2str(rawdp))
            return

        if not self._tsc1:
            # We are called for the first time.
            self._tsc1 = rawdp["BICyc"]
            self._ts1 = rawdp["BIMonotinic"]
            return

        tsc2 = rawdp["BICyc"]
        ts2 = rawdp["BIMonotinic"]

        # Bear in mind that 'ts' is in nanoseconds.
        if ts2 - self._ts1 < self.tsc_cal_time * 1000000000:
            return

        # Should not really happen, but let's be paranoid.
        if ts2 == self._ts1:
            _LOG.debug(
                "TSC did not change, won't use the datapoint for TSC rate calculations:\n%s",
                Human.dict2str(rawdp))
            return

        self.tsc_mhz = ((tsc2 - self._tsc1) * 1000.0) / (ts2 - self._ts1)
Ejemplo n.º 3
0
def filter_command(args):
    """Implements the 'filter' command for the 'wult' and 'ndl' tools."""

    res = RORawResult.RORawResult(args.respath)

    if args.list_columns:
        for colname in res.colnames:
            _LOG.info("%s: %s", colname, res.defs.info[colname]["title"])
        return

    if not getattr(args, "oargs", None):
        raise Error("please, specify at least one reduction criteria.")
    if args.reportid and not args.outdir:
        raise Error("'--reportid' can be used only with '-o'/'--outdir'")

    if args.human_readable and args.outdir:
        raise Error("'--human-readable' and '--outdir' are mutually exclusive")

    apply_filters(args, res)

    if args.outdir:
        res.save(args.outdir, reportid=args.reportid)
    elif not args.human_readable:
        res.df.to_csv(sys.stdout, index=False, header=True)
    else:
        for idx, (_, dp) in enumerate(res.df.iterrows()):
            if idx > 0:
                _LOG.info("")
            _LOG.info(Human.dict2str(dict(dp)))
Ejemplo n.º 4
0
def _validate_range(rng, what, single_ok):
    """Implements 'parse_ldist()'."""

    if single_ok:
        min_len = 1
    else:
        min_len = 2

    split_rng = Trivial.split_csv_line(rng)

    if len(split_rng) < min_len:
        raise Error(f"bad {what} range '{rng}', it should include {min_len} numbers")
    if len(split_rng) > 2:
        raise Error(f"bad {what} range '{rng}', it should not include more than 2 numbers")

    vals = [None] * len(split_rng)

    for idx, val in enumerate(split_rng):
        vals[idx] = Human.parse_duration_ns(val, default_unit="us", name=what)
        if vals[idx] < 0:
            raise Error(f"bad {what} value '{split_rng[idx]}', should be greater than zero")

    if len(vals) == 2 and vals[1] - vals[0] < 0:
        raise Error(f"bad {what} range '{rng}', first number cannot be greater than the second "
                    f"number")
    if len(vals) == 1:
        vals.append(vals[0])

    return vals
Ejemplo n.º 5
0
def cmd_failed_msg(command,
                   stdout,
                   stderr,
                   exitcode,
                   hostname=None,
                   startmsg=None,
                   timeout=None):
    """
    This helper function formats an error message for a failed command 'command'. The 'stdout' and
    'stderr' arguments are what the command printed to the standard output and error streams, and
    'exitcode' is the exit status of the failed command. The 'hostname' parameter is ignored and it
    is here only for the sake of keeping the 'Procs' API look sililar to the 'SSH' API. The
    'startmsg' parameter can be used to specify the start of the error message. The 'timeout'
    argument specifies the command timeout.
    """

    if not isinstance(command, str):
        # Sometimes commands are represented by a list of command components - join it.
        command = " ".join(command)

    if timeout is None:
        timeout = TIMEOUT
    elif timeout == -1:
        timeout = None

    if exitcode is not None:
        exitcode_msg = "failed with exit code %s" % exitcode
    elif timeout is not None:
        exitcode_msg = "did not finish within %s seconds (%s)" \
                       % (timeout, Human.duration(timeout))
    else:
        exitcode_msg = "failed, but no exit code is available, this is a bug!"

    msg = ""
    for stream in (stdout, stderr):
        if not stream:
            continue
        if isinstance(stream, list):
            stream = "".join(stream)
        msg += "%s\n" % stream.strip()

    if not startmsg:
        if hostname:
            startmsg = "ran the following command on host '%s', but it %s" \
                        % (hostname, exitcode_msg)
        else:
            startmsg = "the following command %s:" % exitcode_msg

    result = "%s\n%s" % (startmsg, command)
    if msg:
        result += "\n\n%s" % msg.strip()
    return result
Ejemplo n.º 6
0
    def _process_cstates(self, dp):
        """
        Validate various datapoint 'dp' fields related to C-states. Populate the processed
        datapoint 'dp' with fields related to C-states.
        """

        dp["ReqCState"] = self._get_req_cstate_name(dp)

        if dp["TotCyc"] == 0:
            raise Error(
                f"Zero total cycles ('TotCyc'), this should never happen, unless there is "
                f"a bug. The raw datapoint is:\n{Human.dict2str(dp)}"
            ) from None

        # The driver takes TSC and MPERF counters so that the MPERF interval is inside the
        # TSC interval, so delta TSC (total cycles) is expected to be always greater than
        # delta MPERF (C0 cycles).
        if dp["TotCyc"] < dp["CC0Cyc"]:
            raise Error(
                f"total cycles is smaller than CC0 cycles, the raw datapoint is:\n"
                f"{Human.dict2str(dp)}")

        # Add the C-state percentages.
        for field in self._cs_fields:
            cyc_filed = Defs.get_cscyc_colname(Defs.get_csname(field))

            # In case of POLL state, calculate only CC0%.
            if self._is_poll_idle(dp) and cyc_filed != "CC0Cyc":
                dp[field] = 0
                continue

            dp[field] = dp[cyc_filed] / dp["TotCyc"] * 100.0

            if dp[field] > 100:
                loglevel = logging.DEBUG
                # Sometimes C-state residency counters are not precise, especially during short
                # sleep times. Warn only about too large percentage.
                if dp[field] > 300:
                    loglevel = logging.WARNING

                csname = Defs.get_csname(field)
                _LOG.log(
                    loglevel,
                    "too high %s residency of %.1f%%, using 100%% instead. The "
                    "datapoint is:\n%s", csname, dp[field], Human.dict2str(dp))
                dp[field] = 100.0

        if self._has_cstates and not self._is_poll_idle(dp):
            # Populate 'CC1Derived%' - the software-calculated CC1 residency, which is useful to
            # have because not every Intel platform has a hardware CC1 counter. Calculated as total
            # cycles minus cycles in C-states other than CC1.

            non_cc1_cyc = 0
            for field in dp.keys():
                if Defs.is_cscyc_colname(
                        field) and Defs.get_csname(field) != "CC1":
                    non_cc1_cyc += dp[field]

            dp["CC1Derived%"] = (dp["TotCyc"] -
                                 non_cc1_cyc) / dp["TotCyc"] * 100.0
            if dp["CC1Derived%"] < 0:
                # The C-state counters are not always precise, and we may end up with a negative
                # number.
                dp["CC1Derived%"] = 0
        else:
            dp["CC1Derived%"] = 0
Ejemplo n.º 7
0
    def _process_time(self, dp):
        """
        Calculate, validate, and initialize fields related to time, for example 'WakeLatency' and
        'IntrLatency'.
        """

        if self._drvname == "wult_igb":
            self._calc_wult_igb_delays(dp)

        dp["SilentTime"] = dp["LTime"] - dp["TBI"]
        if self._intr_focus:
            # We do not measure 'WakeLatency' in this case, but it is handy to have it in the
            # dictionary as '0'. We'll remove it at the end of this function.
            dp["WakeLatency"] = 0
        else:
            dp["WakeLatency"] = dp["TAI"] - dp["LTime"]
        dp["IntrLatency"] = dp["TIntr"] - dp["LTime"]

        if self._drvname == "wult_tdt":
            # In case of 'wult_tdt' driver the time is in TSC cycles, convert to nanoseconds.
            for key in ("SilentTime", "WakeLatency", "IntrLatency"):
                dp[key] = self._cyc_to_ns(dp[key])

        if not self._apply_time_adjustments(dp):
            return None

        self._check_cstate_intrs(dp)

        # Try to compensate for the overhead introduced by wult drivers.
        #
        # Some C-states are entered with interrupts enabled (e.g., POLL), and some C-states are
        # entered with interrupts disabled. This is indicated by the 'IntrOff' flag ('IntrOff ==
        # True' are the datapoints for C-states entered with interrupts disabled).
        if dp["IntrOff"]:
            # 1. When the CPU exits the C-state, it runs 'after_idle()' before the interrupt
            #    handler.
            #    1.1. If 'self._intr_focus == False', 'WakeLatency' is measured in 'after_idle()'.
            #         This introduces additional overhead, and delays the interrupt handler. This
            #         overhead can be estimated using 'AICyc1' and 'AICyc2' TSC counter snapshots.
            #    1.2. If 'self._intr_focus == True', 'WakeLatency' is not measured at all.
            # 2. The interrupt handler is executed shortly after 'after_idle()' finishes and the
            #    "cpuidle" Linux kernel subsystem re-enables CPU interrupts.

            if dp["WakeLatency"] >= dp["IntrLatency"]:
                self._warn(
                    "IntrOff_WakeLatency_GT_IntrLatency",
                    "'WakeLatency' is greater than 'IntrLatency', even though interrupts "
                    "were disabled. The datapoint is:\n%s\nDropping this datapoint\n",
                    Human.dict2str(dp))
                return None

            if self._early_intr:
                self._warn(
                    "IntrOff_early_intr",
                    "hit a datapoint with interrupts disabled even though the early "
                    "interrupts feature was enabled. The datapoint is:\n%s\n"
                    "Dropping this datapoint\n", Human.dict2str(dp))
                return None

            if self._intr_focus:
                overhead = 0
            else:
                overhead = dp["AICyc2"] - dp["AICyc1"]
            overhead = self._cyc_to_ns(overhead)

            if overhead >= dp["IntrLatency"]:
                # This sometimes happens, most probably because the overhead is measured by reading
                # TSC at the beginning and the end of the 'after_idle()' function, which runs as
                # soon as the CPU wakes up. The 'IntrLatency' is measured using a delayed event
                # device (e.g., a NIC). So we are comparing two time intervals from different time
                # sources.
                _LOG.debug(
                    "The overhead is greater than interrupt latency ('IntrLatency'). The "
                    "datapoint is:\n%s\nThe overhead is: %f\nDropping this datapoint\n",
                    Human.dict2str(dp), overhead)
                return None

            if dp["WakeLatency"] >= dp["IntrLatency"] - overhead:
                # This condition may happen for similar reasons.
                _LOG.debug(
                    "'WakeLatency' is greater than 'IntrLatency' - overhead, even though "
                    "interrupts were disabled. The datapoint is:\n%s\nThe overhead is: %f\n"
                    "Dropping this datapoint\n", Human.dict2str(dp), overhead)
                return None

            if "IntrLatencyRaw" not in dp:
                dp["IntrLatencyRaw"] = dp["IntrLatency"]
            dp["IntrLatency"] -= overhead

        if not dp["IntrOff"] and not self._intr_focus:
            # 1. When the CPU exits the C-state, it runs the interrupt handler before
            #    'after_idle()'.
            # 2. The interrupt latency is measured in the interrupt handler. This introduces
            #    additional overhead, and delays 'after_idle()'. This overhead can be estimated
            #    using 'IntrCyc1' and 'IntrCyc2' TSC counter snapshots. But if 'self._intr_focus ==
            #    True', 'WakeLatency' is not going to be measured anyway.
            # 3. 'after_idle()' is executed after the interrupt handler and measures 'WakeLatency',
            #    which is greater than 'IntrLatency' in this case.
            #
            # Bear in mind, that wult interrupt handler may be executed after interrupt handlers, in
            # case there are multiple pending interrupts. Also keep in mind that in case of
            # timer-based measurements, the generic Linux interrupt handler is executed first, and
            # wult's handler may be called after other registered handlers. In other words, there
            # may be many CPU instructions between the moment the CPU wakes up from the C-state and
            # the moment it executes wult's interrupt handler.

            if self._drvname == "wult_tdt":
                csname = self._get_req_cstate_name(dp)
                self._warn(
                    f"tdt_{csname}_IntrOn",
                    "The %s C-state has interrupts enabled and therefore, can't be "
                    "collected with the 'tdt' driver. Use another driver for %s.",
                    csname, csname)
                _LOG.debug(
                    "dropping datapoint with interrupts enabled - the 'tdt' driver does not "
                    "handle them correctly. The datapoint is:\n%s",
                    Human.dict2str(dp))
                return None

            if dp["IntrLatency"] >= dp["WakeLatency"]:
                self._warn(
                    "IntrON_IntrLatency_GT_WakeLatency",
                    "'IntrLatency' is greater than 'WakeLatency', even though interrupts "
                    "were enabled. The datapoint is:\n%s\nDropping this datapoint\n",
                    Human.dict2str(dp))
                return None

            if self._intr_focus:
                overhead = 0
            else:
                overhead = dp["IntrCyc2"] - dp["IntrCyc1"]
            overhead = self._cyc_to_ns(overhead)

            if overhead >= dp["WakeLatency"]:
                _LOG.debug(
                    "overhead is greater than wake latency ('WakeLatency'). The "
                    "datapoint is:\n%s\nThe overhead is: %f\nDropping this datapoint\n",
                    overhead, Human.dict2str(dp))
                return None

            if dp["IntrLatency"] >= dp["WakeLatency"] - overhead:
                _LOG.debug(
                    "'IntrLatency' is greater than 'WakeLatency' - overhead, even though "
                    "interrupts were enabled. The datapoint is:\n%s\nThe overhead is: %f\n"
                    "Dropping this datapoint\n", Human.dict2str(dp), overhead)
                return None

            if "WakeLatencyRaw" not in dp:
                dp["WakeLatencyRaw"] = dp["WakeLatency"]
            dp["WakeLatency"] -= overhead

        if self._intr_focus:
            # In case of interrupt-focused measurements we do not really measure 'WakeLatency', but
            # we add it to 'dp' in order to have less 'if' statements in the code. But now it is
            # time to delete it from 'dp'.
            if "WakeLatencyRaw" in dp:
                del dp["WakeLatencyRaw"]
            del dp["WakeLatency"]

        return dp
Ejemplo n.º 8
0
    def _collect(self, dpcnt, tlimit, keep_rawdp):
        """
        Collect datapoints and stop when either the CSV file has 'dpcnt' datapoints in total or when
        collection time exceeds 'tlimit' (value '0' or 'None' means "no limit"). Returns count of
        collected datapoints. If the filters were configured, the returned value counts only those
        datapoints that passed the filters.
        """

        datapoints = self._get_datapoints()
        rawdp = next(datapoints)

        _LOG.info("Calculating TSC rate for %s",
                  Human.duration(self._dpp.tsc_cal_time))

        # We could actually process this datapoint, but we prefer to drop it and start with the
        # second one.
        self._dpp.prepare(rawdp, keep_rawdp)

        latkey = "IntrLatency" if self._intr_focus else "WakeLatency"

        # At least one datapoint should be collected within the 'timeout' seconds interval.
        timeout = self._timeout * 1.5
        start_time = last_rawdp_time = time.time()
        collected_cnt = 0
        tsc_rate_printed = False

        for rawdp in datapoints:
            if time.time() - last_rawdp_time > timeout:
                raise ErrorTimeOut(
                    f"no datapoints accepted for {timeout} seconds. While the "
                    f"driver does produce them, they are being rejected. One "
                    f"possible reason is that they do not pass filters/selectors."
                )

            self._dpp.add_raw_datapoint(rawdp)

            if not self._dpp.tsc_mhz:
                # TSC rate has not been calculated yet.
                continue
            if not tsc_rate_printed:
                _LOG.info("TSC rate is %.6f MHz", self._dpp.tsc_mhz)
                tsc_rate_printed = True

            for dp in self._dpp.get_processed_datapoints():
                if not self._res.csv.hdr:
                    # Add the first CSV header.
                    self._res.csv.add_header(dp.keys())

                # Add the data to the CSV file.
                if not self._res.add_csv_row(dp):
                    # The data point has not been added (e.g., because it did not pass row filters).
                    continue

                self._max_latency = max(dp[latkey], self._max_latency)
                self._progress.update(collected_cnt, self._max_latency)
                last_rawdp_time = time.time()

                collected_cnt += 1
                if collected_cnt >= dpcnt:
                    break

            if tlimit and time.time(
            ) - start_time > tlimit or collected_cnt >= dpcnt:
                break

        return collected_cnt