Beispiel #1
0
def get_coverage_stats(coverage_map):
    """
    Collect stats of coverage counters. In every sampling iteration, these stats are stored
    in corresponding sampling slots.

    Parameters
    ----------
    coverage_map : dict
        mapping of coverage class and its coverage object.

    Raises
    ------
    OsCommandExc
        if the given OS command did not succeed for some reason.
    """
    nlog = Context.nlog

    # retrieve required data from the vswitch.
    cmd = "ovs-appctl coverage/show"
    data = util.exec_host_command(cmd)
    if not data:
        raise OsCommandExc("unable to collect data")

    for line in data.splitlines():
        # In below matching line, retrieve upcall_flow_limit_hit count
        if line.startswith("upcall_flow_limit_hit"):
            linesre = re.split(r'\s{2,}', line)

            name, count = linesre[-1].split(": ")
            count = int(count)

            # if coverage object exists modify it by adding new sample
            if "coverage" in coverage_map:
                coverage = coverage_map["coverage"]
                coverage.index = (coverage.index + 1) % config.ncd_samples_max

            # else create new coverage object and add sample to it
            else:
                coverage = Dataif_Coverage()
                Context.coverage_map["coverage"] = coverage

            coverage.upcall[coverage.index] = count

    return coverage_map
Beispiel #2
0
def ncd_kill(signal, frame):
    ctx = dataif.Context
    rctx = RebalContext
    tctx = TraceContext

    nlog.critical("Got signal %s, doing required clean up .." % signal)

    # reset rebalance settings in ports
    cmd = ""
    for port_name, port in ctx.port_to_cls.items():
        # skip port that we did not rebalance.
        if not port.rebalance:
            continue

        cmd += "-- remove Interface %s other_config pmd-rxq-affinity " % (
            port_name)

    if cmd:
        ret = util.exec_host_command("ovs-vsctl --no-wait %s" % cmd)
        if ret == 0:
            nlog.info("removed pmd-rxq-affinity in rebalanced ports.")
        else:
            nlog.warn("removing pmd-rxq-affinity failed for some ports.")
            nlog.warn("you may check ovs-vsctl --no-wait %s" % cmd)

    os.makedirs(os.path.dirname(config.ncd_dump_file), exist_ok=True)
    with open(config.ncd_dump_file, 'w') as f:
        fh = ctx.log_handler
        json.dump(
            {
                "time": "%s" % datetime.now(),
                "version": netcontrold.__version__,
                "events": ctx.events,
                "config": {
                    "trace_mode": tctx.trace_mode,
                    "rebalance_mode": rctx.rebal_mode,
                    "verbose_log": fh.level == logging.DEBUG,
                },
            }, f)

        nlog.info("saved current configuration and data in %s" %
                  config.ncd_dump_file)

    raise error.NcdShutdownExc
Beispiel #3
0
def ncd_kill(signal, frame):
    ctx = dataif.Context
    nlog.critical("Got signal %s, doing required clean up .." % signal)

    # reset rebalance settings in ports
    cmd = ""
    for port_name, port in ctx.port_to_cls.items():
        # skip port that we did not rebalance.
        if not port.rebalance:
            continue

        cmd += "-- remove Interface %s other_config pmd-rxq-affinity " % (
            port_name)

    if cmd:
        ret = util.exec_host_command("ovs-vsctl --no-wait %s" % cmd)
        if ret == 0:
            nlog.info("removed pmd-rxq-affinity in rebalanced ports.")
        else:
            nlog.warn("removing pmd-rxq-affinity failed for some ports.")
            nlog.warn("you may check ovs-vsctl --no-wait %s" % cmd)

    raise error.NcdShutdownExc
Beispiel #4
0
    def run(self):
        sock_file = config.ncd_socket

        try:
            os.unlink(sock_file)
        except OSError:
            if os.path.exists(sock_file):
                raise

        os.makedirs(os.path.dirname(sock_file), exist_ok=True)

        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)

        nlog.info("starting ctld on %s" % sock_file)
        sock.bind(sock_file)
        sock.listen(1)

        while (not self.ncd_shutdown.is_set()):
            conn, client = sock.accept()

            try:
                cmd = conn.recv(24).decode()

                ctx = dataif.Context
                rctx = RebalContext
                tctx = TraceContext

                if cmd == 'CTLD_TRACE_ON':
                    if not tctx.trace_mode:
                        nlog.info("turning on trace mode ..")
                        tctx.trace_mode = True
                    else:
                        nlog.info("trace mode already on ..!")

                    conn.sendall(b"CTLD_ACK")

                elif cmd == 'CTLD_TRACE_OFF':
                    if tctx.trace_mode:
                        nlog.info("turning off trace mode ..")
                        tctx.trace_mode = False
                    else:
                        nlog.info("trace mode already off ..!")

                    conn.sendall(b"CTLD_ACK")

                elif cmd == 'CTLD_REBAL_ON':
                    if not rctx.rebal_mode:
                        nlog.info("turning on rebalance mode ..")
                        rctx.rebal_mode = True
                    else:
                        nlog.info("rebalance mode already on ..!")

                    conn.sendall(b"CTLD_ACK")

                elif cmd == 'CTLD_REBAL_OFF':
                    if rctx.rebal_mode:
                        nlog.info("turning off rebalance mode ..")
                        rctx.rebal_mode = False
                    else:
                        nlog.info("rebalance mode already off ..!")

                    conn.sendall(b"CTLD_ACK")

                elif cmd == 'CTLD_REBAL_QUICK_ON':
                    if not rctx.rebal_quick:
                        nlog.info("turning on rebalance quick mode ..")
                        rctx.rebal_quick = True
                    else:
                        nlog.info("rebalance quick mode already on ..!")

                    conn.sendall(b"CTLD_ACK")

                elif cmd == 'CTLD_REBAL_QUICK_OFF':
                    if rctx.rebal_quick:
                        nlog.info("turning off rebalance quick mode ..")
                        rctx.rebal_quick = False
                    else:
                        nlog.info("rebalance quick mode already off ..!")

                    conn.sendall(b"CTLD_ACK")

                elif cmd == 'CTLD_VERBOSE_ON':
                    fh = ctx.log_handler
                    if fh.level == logging.INFO:
                        nlog.info("turning on verbose mode ..")
                        fh.setLevel(logging.DEBUG)
                    else:
                        nlog.info("verbose mode already on ..!")

                    conn.sendall(b"CTLD_ACK")

                elif cmd == 'CTLD_VERBOSE_OFF':
                    fh = ctx.log_handler
                    if fh.level == logging.DEBUG:
                        nlog.info("turning off verbose mode ..")
                        fh.setLevel(logging.INFO)
                    else:
                        nlog.info("verbose mode already off ..!")

                    conn.sendall(b"CTLD_ACK")

                elif cmd == 'CTLD_REBAL_CNT':
                    n = 0
                    if rctx.rebal_mode:
                        n = len(rctx.rebal_stat)
                        for (x, y, z) in ctx.events:
                            if y == 'rebalance':
                                n += 1

                    conn.sendall(b"CTLD_DATA_ACK %6d" % (len(str(n))))
                    conn.sendall(str(n).encode())

                elif cmd == 'CTLD_CONFIG':
                    status = "trace mode:"
                    if tctx.trace_mode:
                        status += " on\n"
                    else:
                        status += " off\n"

                    status += "rebalance mode:"
                    if rctx.rebal_mode:
                        status += " on\n"
                    else:
                        status += " off\n"

                    status += "rebalance quick:"
                    if rctx.rebal_quick:
                        status += " on\n"
                    else:
                        status += " off\n"

                    status += "verbose log:"
                    fh = ctx.log_handler
                    if fh.level == logging.DEBUG:
                        status += " on\n"
                    else:
                        status += " off\n"

                    conn.sendall(b"CTLD_DATA_ACK %6d" % (len(status)))
                    conn.sendall(status.encode())

                elif cmd == 'CTLD_STATUS':
                    status = "%-16s | %-12s | %s\n" % ('Interface', 'Event',
                                                       'Time stamp')
                    status += ('-' * 17) + '+' + ('-' * 14) + '+' + ('-' * 28)
                    status += '\n'

                    for (x, y, z) in ctx.events:
                        status += "%-16s | %-12s | %s\n" % (x, y, z)

                    conn.sendall(b"CTLD_DATA_ACK %6d" % (len(status)))
                    conn.sendall(status.encode())

                elif cmd == 'CTLD_VERSION':
                    status = "netcontrold v%s\n" % netcontrold.__version__
                    ret = util.exec_host_command("ovs-vsctl -V")
                    if ret == 1:
                        status += "openvswitch (unknown)\n"
                    else:
                        parse = re.match("ovs-vsctl \(Open vSwitch\) (.*?)\n",
                                         ret)
                        status += "openvswitch v%s\n" % parse[1]

                    conn.sendall(b"CTLD_DATA_ACK %6d" % (len(status)))
                    conn.sendall(status.encode())

                else:
                    nlog.info("unknown control command %s" % cmd)

            finally:
                conn.close()

        return
Beispiel #5
0
def ncd_main(argv):
    # input options
    argpobj = argparse.ArgumentParser(
        prog='ncd.py', description='control network load on pmd')

    argpobj.add_argument('-s',
                         '--sample-interval',
                         type=int,
                         default=10,
                         help='seconds between each sampling (default: 10)')

    argpobj.add_argument(
        '-t',
        '--trace',
        required=False,
        action='store_true',
        default=False,
        help='operate in trace mode',
    )

    argpobj.add_argument('--trace-cb',
                         type=str,
                         default='ncd_cb_pktdrop',
                         help='trace mode callback '
                         '(default: ncd_cb_pktdrop)')

    argpobj.add_argument(
        '-r',
        '--rebalance',
        required=False,
        action='store_true',
        default=True,
        help="operate in rebalance mode",
    )

    argpobj.add_argument('--rebalance-interval',
                         type=int,
                         default=60,
                         help='seconds between each re-balance '
                         '(default: 60)')

    argpobj.add_argument('--rebalance-n',
                         type=int,
                         default=1,
                         help='rebalance dry-runs at the max (default: 1)')

    argpobj.add_argument('--rebalance-iq',
                         action='store_true',
                         default=False,
                         help='rebalance by iterative queues logic '
                         '(default: False)')

    argpobj.add_argument('-q',
                         '--quiet',
                         action='store_true',
                         default=False,
                         help='no logging in terminal (default: False)')

    argpobj.add_argument('-v',
                         '--verbose',
                         action='store_true',
                         default=False,
                         help='trace logging (default: False)')

    args = argpobj.parse_args(argv)

    # check input to ncd
    ncd_trace = args.trace
    ncd_trace_cb = args.trace_cb
    ncd_rebal = args.rebalance

    if ncd_trace and not util.exists(ncd_trace_cb):
        print("no such program %s exists!" % ncd_trace_cb)
        sys.exit(1)

    # set verbose level
    os.makedirs(os.path.dirname(config.ncd_log_file), exist_ok=True)
    fh = RotatingFileHandler(config.ncd_log_file,
                             maxBytes=(config.ncd_log_max_KB * 1024),
                             backupCount=config.ncd_log_max_backup_n)

    fh_fmt = logging.Formatter(
        "%(asctime)s|%(name)s|%(levelname)s|%(message)s")
    fh.setFormatter(fh_fmt)
    if args.verbose:
        fh.setLevel(logging.DEBUG)
    else:
        fh.setLevel(logging.INFO)

    ch = logging.StreamHandler(sys.stdout)
    ch_fmt = logging.Formatter("%(message)s")
    ch.setFormatter(ch_fmt)
    ch.setLevel(logging.INFO)

    global nlog
    nlog = logging.getLogger('ncd')
    nlog.setLevel(logging.DEBUG)
    nlog.addHandler(fh)
    if not args.quiet:
        nlog.addHandler(ch)

    ctx = dataif.Context
    ctx.nlog = nlog
    ctx.log_handler = fh
    pmd_map = ctx.pmd_map

    # set sampling interval to collect data
    ncd_sample_interval = args.sample_interval

    # set interval between each re-balance
    ncd_rebal_interval = args.rebalance_interval

    # set rebalance dryrun count
    ncd_rebal_n = args.rebalance_n

    # set iterative queue rebalance algorithm
    ncd_iq_rebal = args.rebalance_iq

    # set rebalance method.
    if ncd_iq_rebal:
        rebalance_dryrun = dataif.rebalance_dryrun_by_iq
    else:
        # round robin logic to rebalance.
        rebalance_dryrun = dataif.rebalance_dryrun_by_cyc

        # restrict only one dry run for cycles based mode.
        ncd_rebal_n = 1

    # set check point to call rebalance in vswitch
    rctx = RebalContext
    rctx.rebal_tick_n = ncd_rebal_interval / ncd_sample_interval

    # rebalance dryrun iteration
    rebal_i = 0

    if ncd_rebal:
        # adjust length of the samples counter
        config.ncd_samples_max = min(ncd_rebal_interval / ncd_sample_interval,
                                     config.ncd_samples_max)

        rctx.rebal_mode = True
        rctx.rebal_quick = True

    config.ncd_samples_max = int(config.ncd_samples_max)

    # set signal handler to abort ncd
    signal.signal(signal.SIGINT, ncd_kill)
    signal.signal(signal.SIGTERM, ncd_kill)

    tctx = TraceContext
    if ncd_trace:
        tctx.trace_mode = True

    # start ctld thread to monitor control command and dispatch
    # necessary action.
    shutdown_event = threading.Event()
    tobj = CtlDThread(shutdown_event)
    tobj.daemon = True

    try:
        tobj.start()
    except threading.ThreadError:
        nlog.info("failed to start ctld thread ..")
        sys.exit(1)

    prev_var = 0
    cur_var = 0
    ncd_samples_max = config.ncd_samples_max
    min_sample_i = 0

    # begin rebalance dry run
    while (1):
        try:
            collect_data(ncd_samples_max, ncd_sample_interval)
            min_sample_i += ncd_samples_max

            nlog.info("current pmd load:")
            for pmd_id in sorted(pmd_map.keys()):
                pmd = pmd_map[pmd_id]
                nlog.info("pmd id %d load %d" % (pmd_id, pmd.pmd_load))

            cur_var = dataif.pmd_load_variance(pmd_map)
            nlog.info("current pmd load variance: %d" % cur_var)

            # do not trace if rebalance dry-run in progress.
            if tctx.trace_mode and not rebal_i:
                pmd_cb_list = []
                for pname in sorted(ctx.port_to_cls.keys()):
                    port = ctx.port_to_cls[pname]
                    drop = dataif.port_drop_ppm(port)
                    drop_min = config.ncd_cb_pktdrop_min
                    tx_retry = dataif.port_tx_retry(port)
                    do_cb = False
                    if drop[0] > drop_min:
                        nlog.info("port %s drop_rx %d ppm above %d ppm" %
                                  (port.name, drop[0], drop_min))
                        ctx.events.append((port.name, "rx_drop", ctx.last_ts))
                        do_cb = True

                    if drop[1] > drop_min:
                        nlog.info("port %s drop_tx %d ppm above %d ppm" %
                                  (port.name, drop[1], drop_min))
                        ctx.events.append((port.name, "tx_drop", ctx.last_ts))
                        do_cb = True

                    if tx_retry > config.ncd_samples_max:
                        nlog.info(
                            "port %s tx_retry %d above %d" %
                            (port.name, tx_retry, config.ncd_samples_max))
                        ctx.events.append((port.name, "tx_retry", ctx.last_ts))
                        do_cb = True

                    if not do_cb:
                        # no pmd needs to be traceged.
                        continue

                    for pmd_id in sorted(pmd_map.keys()):
                        pmd = pmd_map[pmd_id]
                        if (pmd.find_port_by_name(port.name)):
                            pmd_cb_list.insert(0, pmd_id)

                if (len(pmd_cb_list) > 0):
                    pmds = " ".join(list(map(str, set(pmd_cb_list))))
                    cmd = "%s %s" % (ncd_trace_cb, pmds)
                    nlog.info("executing callback %s" % cmd)
                    data = util.exec_host_command(cmd)
                    nlog.info(data)

            if not rctx.rebal_mode:
                continue

            # At the minimum for deriving current load on pmds, all of
            # the sampling counters (of size config.ncd_samples_max) have
            # to be filled "every time" before other evaluations done.
            #
            # However, for quick rebalance, we fill all the counters
            # once, and then keep rolling with one counter across old
            # stats so that, we reduce time to sample before kicking off
            # rebalance (from sampling config.ncd_samples_max counters
            # to only one.
            #
            # As ovs internally refers all its 6 sample counters for any
            # stats we query, it is absolutely fine we roll with one
            # new sample and retain old n-1 samples to check for current
            # state of pmd and rxqs.
            #
            if rctx.rebal_quick and (min_sample_i >= config.ncd_samples_max):
                ncd_samples_max = 1
                min_sample_i = (config.ncd_samples_max - 1)
            else:
                ncd_samples_max = config.ncd_samples_max

            # dry-run pmd rebalance.
            if pmd_map and rebalance_dryrun(pmd_map):
                rebal_i += 1

            # restart sampling when no dry-run performed.
            if not rebal_i:
                nlog.info("no dryrun performed.")
                continue

            else:
                # compare previous and current state of pmds.
                prev_var = cur_var
                collect_data(ncd_samples_max, ncd_sample_interval)
                cur_var = dataif.pmd_load_variance(pmd_map)

                nlog.info("pmd load in dry run(%d):" % rebal_i)
                for pmd_id in sorted(pmd_map.keys()):
                    pmd = pmd_map[pmd_id]
                    nlog.info("pmd id %d load %d" % (pmd_id, pmd.pmd_load))

                nlog.info("pmd load variance: previous %d, in dry run(%d) %d" %
                          (prev_var, rebal_i, cur_var))

                if (cur_var < prev_var):
                    diff = (prev_var - cur_var) * 100 / prev_var
                    if diff > config.ncd_pmd_load_improve_min:
                        rctx.apply_rebal = True

                # check if we reached maximum allowable dry-runs.
                if rebal_i < ncd_rebal_n:
                    # continue for more dry runs.
                    continue

                # check if balance state of all pmds is reached
                if rctx.apply_rebal:
                    # check if rebalance call needed really.
                    if (rctx.rebal_tick > rctx.rebal_tick_n):
                        rctx.rebal_tick = 0
                        cmd = rebalance_switch(pmd_map)
                        ctx.events.append(("pmd", "rebalance", ctx.last_ts))
                        nlog.info(
                            "vswitch command for current optimization is: %s" %
                            cmd)
                        rctx.apply_rebal = False

                        if (util.exec_host_command(cmd) == 1):
                            nlog.info("problem running this command.. "
                                      "check vswitch!")
                            now = datetime.now()
                            now_ts = now.strftime("%Y-%m-%d %H:%M:%S")
                            ctx.events.append(("switch", "error", now_ts))

                        # sleep for few seconds before thrashing current
                        # dry-run
                        nlog.info("waiting for %d seconds "
                                  "before new dry runs begin.." %
                                  config.ncd_vsw_wait_min)
                        time.sleep(config.ncd_vsw_wait_min)
                    else:
                        nlog.info("minimum rebalance interval not met!"
                                  " now at %d sec" %
                                  (rctx.rebal_tick * ncd_sample_interval))
                else:
                    nlog.info("no new optimization found ..")

                # reset collected data
                pmd_map.clear()
                ctx.port_to_cls.clear()
                ctx.port_to_id.clear()
                ncd_samples_max = config.ncd_samples_max
                rebal_i = 0
                min_sample_i = 0

                nlog.info("dry-run reset.")

        except error.NcdShutdownExc:
            nlog.info("Exiting NCD ..")
            tobj.ncd_shutdown.set()
            sys.exit(1)

    tobj.join()
Beispiel #6
0
def get_port_stats():
    """
    Collect stats on every port in the datapath.
    In every sampling iteration, these stats are stored
    in corresponding sampling slots.

    Raises
    ------
    OsCommandExc
        if the given OS command did not succeed for some reason.
    ObjModleExc
        if state of ports in switch differ.
    """

    nlog = Context.nlog

    # retrieve required data from the vswitch.
    cmd = "ovs-appctl dpctl/show -s"
    data = util.exec_host_command(cmd)
    if not data:
        raise OsCommandExc("unable to collect data")

    # current state of ports
    cur_port_l = sorted(Context.port_to_cls.keys())

    # current port object to be used in every line under parse.
    port = None

    for line in data.splitlines():
        if re.match(r'\s.*port\s(\d+):\s([A-Za-z0-9_-]+) *', line):
            # In below matching line, we retrieve port id and name.
            linesre = re.search(r'\s.*port\s(\d+):\s([A-Za-z0-9_-]+) *', line)
            (pid, pname) = linesre.groups()
            Context.port_to_id[pname] = int(pid)

            # If in mid of sampling, we should have port_to_cls having
            # entry for this port name.
            if pname in Context.port_to_cls:
                port = Context.port_to_cls[pname]
                assert (port.id == pid)

                # Store following stats in new sampling slot.
                port.cyc_idx = (port.cyc_idx + 1) % config.ncd_samples_max
                nlog.debug("port %s in iteration %d" %
                           (port.name, port.cyc_idx))
            else:
                # create new entry in port_to_cls for this port.
                port = make_dataif_port(pname)
                port.id = pid
                nlog.debug("added port %s stats.." % pname)

        elif re.match(r'\s.*RX packets:(\d+) .*? dropped:(\d+) *', line):
            # From other lines, we retrieve stats of the port.
            linesre = re.search(r'\s.*RX packets:(\d+) .*? dropped:(\d+) *',
                                line)
            (
                rx,
                drop,
            ) = linesre.groups()
            port.rx_cyc[port.cyc_idx] = int(rx)
            port.rx_drop_cyc[port.cyc_idx] = int(drop)

        elif re.match(r'\s.*TX packets:(\d+) .*? dropped:(\d+) *', line):
            # From other lines, we retrieve stats of the port.
            linesre = re.search(r'\s.*TX packets:(\d+) .*? dropped:(\d+) *',
                                line)
            (
                tx,
                drop,
            ) = linesre.groups()
            port.tx_cyc[port.cyc_idx] = int(tx)
            port.tx_drop_cyc[port.cyc_idx] = int(drop)

    # new state of ports.
    new_port_l = sorted(Context.port_to_cls.keys())

    # skip modelling this object if states differ.
    if len(cur_port_l) > 0 and cur_port_l != new_port_l:
        raise ObjModelExc("ports count differ")

    # current port object to be used in every line under parse.
    return None
Beispiel #7
0
def get_pmd_rxqs(pmd_map):
    """
    Collect info on how rxq is pinned with pmd, from the vswitch.

    Parameters
    ----------
    pmd_map : dict
        mapping of pmd id and its Dataif_Pmd object.

    Raises
    ------
    OsCommandExc
        if the given OS command did not succeed for some reason.
    ObjConsistencyExc
        if state of pmds in ncd differ.
    ObjParseExc
        if unable to retrieve info from switch.
    ObjModleExc
        if state of pmds in switch differ.
    """

    nlog = Context.nlog

    # retrieve required data from the vswitch.
    cmd = "ovs-appctl dpif-netdev/pmd-rxq-show"
    data = util.exec_host_command(cmd)
    if not data:
        raise OsCommandExc("unable to collect data")

    # current state of pmds
    cur_pmd_l = sorted(pmd_map.keys())

    # sname and sval stores parsed string's key and value.
    sname, sval = None, None
    # current pmd object to be used in every line under parse.
    pmd = None

    for line in data.splitlines():
        if line.startswith('pmd thread'):
            # In below matching line, we retrieve core id (aka pmd id)
            # and core id.
            linesre = re.search(r'pmd thread numa_id (\d+) core_id (\d+):',
                                line)
            numa_id = int(linesre.groups()[0])
            core_id = int(linesre.groups()[1])
            if core_id not in pmd_map:
                raise ObjConsistencyExc(
                    "trying to add new pmd %d in mid of ncd!.. aborting! ")
            pmd = pmd_map[core_id]
            assert (pmd.numa_id == numa_id)
            nlog.debug("pmd %d in iteration %d" % (pmd.id, pmd.cyc_idx))

        elif re.match(r'\s.*port: .*', line):
            # From this line, we retrieve cpu usage of rxq.
            linesre = re.search(
                r'\s.*port:\s([A-Za-z0-9_-]+)\s*'
                r'queue-id:\s*(\d+)\s*(?=\((enabled)\))?.*'
                r'pmd usage:\s*(\d+|NOT AVAIL)\s*?', line)

            pname = linesre.groups()[0]
            qid = int(linesre.groups()[1])
            enabled_flag = linesre.groups()[2]
            try:
                qcpu = int(linesre.groups()[3])
            except ValueError:
                qcpu = linesre.groups()[3]
                if (qcpu == 'NOT AVAIL'):
                    raise ObjParseExc("pmd usage unavailable for now")
                else:
                    raise ObjParseExc("error parsing line %s" % line)

            # get the Dataif_Port owning this rxq.
            port = pmd.find_port_by_name(pname)
            if not port:
                port = pmd.add_port(pname)

            # update port attributes now.
            port.id = Context.port_to_id[pname]
            port.numa_id = pmd.numa_id

            port_cls = Context.port_to_cls[pname]
            port_cls.rebalance = True

            # check whether this rxq was being rebalanced.
            if qid in port.rxq_rebalanced:
                raise ObjConsistencyExc(
                    "stale %s object found while parsing rxq in pmd ..")
            else:
                # skip updating zero index as rxq counters works only
                # with the differences between pmd stats in every
                # sampling slot.
                if pmd.cyc_idx == 0:
                    continue

                # port not in rebalancing state, so update rxq for its
                # cpu cycles consumed by it.
                rxq = (port.find_rxq_by_id(qid) or port.add_rxq(qid))
                rxq.pmd = pmd
                rxq.port = port
                cur_idx = pmd.cyc_idx
                prev_idx = (cur_idx - 1) % config.ncd_samples_max
                rx_diff = pmd.rx_cyc[cur_idx] - pmd.rx_cyc[prev_idx]
                pcpu_diff = pmd.proc_cpu_cyc[cur_idx] - pmd.proc_cpu_cyc[
                    prev_idx]
                icpu_diff = pmd.idle_cpu_cyc[cur_idx] - pmd.idle_cpu_cyc[
                    prev_idx]
                cpu_diff = pcpu_diff + icpu_diff
                qcpu_diff = int((qcpu * cpu_diff) / 100)
                qrx_diff = int((qcpu * rx_diff) / 100)

            rxq.cpu_cyc[pmd.cyc_idx] = qcpu_diff
            rxq.rx_cyc[pmd.cyc_idx] = qrx_diff
            if (enabled_flag == "enabled"):
                rxq.enabled = True
            else:
                rxq.enabled = False
        else:
            # From other line, we retrieve isolated flag.
            (sname, sval) = line.split(":")
            sname = re.sub(r"^\s+", "", sname)
            assert (sname == 'isolated ')
            pmd.isolated = {'true': True, 'false': False}[sval[1:]]

    # new state of pmds.
    new_pmd_l = sorted(pmd_map.keys())

    # skip modelling this object if states differ.
    if len(cur_pmd_l) > 0 and cur_pmd_l != new_pmd_l:
        raise ObjModelExc("pmds count differ")

    return pmd_map
Beispiel #8
0
def get_pmd_stats(pmd_map):
    """
    Collect stats on every pmd running in the system and update
    pmd_map. In every sampling iteration, these stats are stored
    in corresponding sampling slots.

    Parameters
    ----------
    pmd_map : dict
        mapping of pmd id and its Dataif_Pmd object.

    Raises
    ------
    OsCommandExc
        if the given OS command did not succeed for some reason.
    ObjConsistencyExc
        if state of pmds in ncd differ.
    ObjModleExc
        if state of pmds in switch differ.
    """

    nlog = Context.nlog

    # retrieve required data from the vswitch.
    cmd = "ovs-appctl dpif-netdev/pmd-stats-show"
    data = util.exec_host_command(cmd)
    if not data:
        raise OsCommandExc("unable to collect data")

    # current state of pmds
    cur_pmd_l = sorted(pmd_map.keys())

    # sname and sval stores parsed string's key and value.
    sname, sval = None, None
    # current pmd object to be used in every line under parse.
    pmd = None

    for line in data.splitlines():
        if line.startswith("pmd thread"):
            # In below matching line, we retrieve core id (aka pmd id)
            # and core id.
            linesre = re.search(r'pmd thread numa_id (\d+) core_id (\d+):',
                                line)
            numa_id = int(linesre.groups()[0])
            core_id = int(linesre.groups()[1])

            # If in mid of sampling, we should have pmd_map having
            # entry for this core id.
            if core_id in pmd_map:
                pmd = pmd_map[core_id]

                # Check to ensure we are good to go as local should
                # always be used.
                assert (pmd.numa_id == numa_id)

                # Store following stats in new sampling slot.
                pmd.cyc_idx = (pmd.cyc_idx + 1) % config.ncd_samples_max
                nlog.debug("pmd %d in iteration %d" % (pmd.id, pmd.cyc_idx))
            else:
                # Very first sampling for each pmd occur in this
                # clause. Just ensure, no new pmd is added from system
                # reconfiguration.
                if len(pmd_map) != 0 and not pmd:
                    raise ObjConsistencyExc(
                        "trying to add new pmd %d in mid of ncd!.. aborting! ")

                # create new entry in pmd_map for this pmd.
                pmd = Dataif_Pmd(core_id)
                pmd_map[pmd.id] = pmd
                nlog.debug("added pmd %s stats.." % pmd.id)

                # numa id of pmd is of core's.
                pmd.numa_id = numa_id
        elif line.startswith("main thread"):
            # end of pmd stats
            break
        else:
            # From other lines, we retrieve stats of the pmd.
            (sname, sval) = line.split(":")
            sname = re.sub(r"^\s+", "", sname)
            sval = sval[1:].split()
            if sname == "packets received":
                pmd.rx_cyc[pmd.cyc_idx] = int(sval[0])
            elif sname == "idle cycles":
                pmd.idle_cpu_cyc[pmd.cyc_idx] = int(sval[0])
            elif sname == "processing cycles":
                pmd.proc_cpu_cyc[pmd.cyc_idx] = int(sval[0])

    # new state of pmds.
    new_pmd_l = sorted(pmd_map.keys())

    # skip modelling this object if states differ.
    if len(cur_pmd_l) > 0 and cur_pmd_l != new_pmd_l:
        raise ObjModelExc("pmds count differ")

    return pmd_map
Beispiel #9
0
def get_interface_stats():
    """
    Collect retry stats on every applicable port in the datapath.
    In every sampling iteration, these stats are stored
    in corresponding sampling slots.

    Raises
    ------
    OsCommandExc
        if the given OS command did not succeed for some reason.
    ObjModleExc
        if state of ports in switch differ.
    """

    nlog = Context.nlog

    # retrieve required data from the vswitch.
    cmd = "ovs-vsctl list interface"
    data = util.exec_host_command(cmd)
    if not data:
        raise OsCommandExc("unable to collect data")

    # current state of ports
    cur_port_l = sorted(Context.port_to_cls.keys())

    # current port object to be used in every line under parse.
    port = None

    for line in data.splitlines():
        if re.match(r'\s*name\s.*:\s"*([A-Za-z0-9_-]+)"*', line):
            # In below matching line, we retrieve port id and name.
            linesre = re.search(r'\s*name\s.*:\s"*([A-Za-z0-9_-]+)"*', line)
            (pname, ) = linesre.groups()

            # If in mid of sampling, we should have port_to_cls having
            # entry for this port name.
            if pname in Context.port_to_cls:
                port = Context.port_to_cls[pname]

                nlog.debug("port %s in iteration %d" %
                           (port.name, port.cyc_idx))

        elif re.match(r'\s*type\s.*:\s([a-z]+)', line):
            if not port:
                continue

            # From other lines, we retrieve stats of the port.
            linesre = re.search(r'\s*type\s.*:\s([a-z]+)', line)
            (type, ) = linesre.groups()
            port.type = type

            port = None

        elif re.match(r'\s*statistics\s.*:\s{(.*)}', line):
            if not port:
                continue

            # From other lines, we retrieve stats of the port.
            linesre = re.search(r'\s*statistics\s.*:\s{(.*)}', line)
            (sval, ) = linesre.groups()
            dval = {
                sub.split("=")[0]: sub.split("=")[1]
                for sub in sval.split(", ")
            }

            if 'tx_retries' in dval:
                port.tx_retry_cyc[port.cyc_idx] = int(dval['tx_retries'])

    # new state of ports.
    new_port_l = sorted(Context.port_to_cls.keys())

    # skip modelling this object if states differ.
    if len(cur_port_l) > 0 and cur_port_l != new_port_l:
        raise ObjModelExc("ports count differ")

    return None
Beispiel #10
0
def get_pmd_rxqs(pmd_map):
    """
    Collect info on how rxq is pinned with pmd, from the vswitch.

    Parameters
    ----------
    pmd_map : dict
        mapping of pmd id and its Dataif_Pmd object.

    Raises
    ------
    OsCommandExc
        if the given OS command did not succeed for some reason.
    ObjConsistencyExc
        if state of pmds in ncd differ.
    ObjParseExc
        if unable to retrieve info from switch.
    ObjModleExc
        if state of pmds in switch differ.
    """

    nlog = Context.nlog

    # retrieve required data from the vswitch.
    cmd = "ovs-appctl dpif-netdev/pmd-rxq-show"
    data = util.exec_host_command(cmd)
    if not data:
        raise OsCommandExc("unable to collect data")

    # current state of pmds
    cur_pmd_l = sorted(pmd_map.keys())

    # sname and sval stores parsed string's key and value.
    sname, sval = None, None
    # current pmd object to be used in every line under parse.
    pmd = None

    for line in data.splitlines():
        if line.startswith('pmd thread'):
            # In below matching line, we retrieve core id (aka pmd id)
            # and core id.
            linesre = re.search(r'pmd thread numa_id (\d+) core_id (\d+):',
                                line)
            numa_id = int(linesre.groups()[0])
            core_id = int(linesre.groups()[1])
            if core_id not in pmd_map:
                raise ObjConsistencyExc(
                    "trying to add new pmd %d in mid of ncd!.. aborting! ")
            pmd = pmd_map[core_id]
            assert(pmd.numa_id == numa_id)
            nlog.debug("pmd %d in iteration %d" % (pmd.id, pmd.cyc_idx))

        elif re.match(r'\s.*port: .*', line):
            # From this line, we retrieve cpu usage of rxq.
            linesre = re.search(r'\s.*port:\s([A-Za-z0-9_-]+)\s*'
                                r'queue-id:\s*(\d+)\s*'
                                r'pmd usage:\s*(\d+|NOT AVAIL)\s*?',
                                line)

            pname = linesre.groups()[0]
            qid = int(linesre.groups()[1])
            try:
                qcpu = int(linesre.groups()[2])
            except ValueError:
                qcpu = linesre.groups()[2]
                if (qcpu == 'NOT AVAIL'):
                    raise ObjParseExc("pmd usage unavailable for now")
                else:
                    raise ObjParseExc("error parsing line %s" % line)

            # get the Dataif_Port owning this rxq.
            port = pmd.find_port_by_name(pname)
            if not port:
                port = pmd.add_port(pname)

            # update port attributes now.
            port.id = Context.port_to_id[pname]
            port.numa_id = pmd.numa_id

            port_cls = Context.port_to_cls[pname]
            port_cls.rebalance = True

            # check whether this rxq was being rebalanced.
            if qid in port.rxq_rebalanced:
                # In dry-run, we need to update cpu cycles consumed by
                # this rxq (through current pmd), into the processing
                # cycles of the rebalancing pmd. Then the load of the
                # rebalancing pmd could be estimated appropriately.
                reb_pmd_id = port.rxq_rebalanced[qid]
                reb_pmd = pmd_map[reb_pmd_id]
                reb_port = reb_pmd.find_port_by_name(port.name)
                rxq = reb_port.find_rxq_by_id(qid)
                # qcpu is in percentage in this data, so we convert it
                # into actual cycles using processing cycles that this
                # pmd consumed.
                # qrx is approximate count of packets that this rxq
                # received.
                cur_idx = pmd.cyc_idx
                prev_idx = (cur_idx - 1) % config.ncd_samples_max
                rx_diff = pmd.rx_cyc[cur_idx] - pmd.rx_cyc[prev_idx]
                cpu_diff = pmd.proc_cpu_cyc[
                    cur_idx] - pmd.proc_cpu_cyc[prev_idx]
                qrx = (qcpu * rx_diff) / 100
                qcpu = (qcpu * cpu_diff) / 100
                # update rebalancing pmd for cpu cycles and rx count.
                reb_pmd.proc_cpu_cyc[cur_idx] += qcpu
                reb_pmd.idle_cpu_cyc[cur_idx] -= qcpu
                reb_pmd.rx_cyc[pmd.cyc_idx] += qrx
                # update current pmd for cpu cycles and rx count.
                pmd.proc_cpu_cyc[pmd.cyc_idx] -= qcpu
                pmd.idle_cpu_cyc[pmd.cyc_idx] += qcpu
                pmd.rx_cyc[pmd.cyc_idx] -= qrx
            else:
                # port not in rebalancing state, so update rxq for its
                # cpu cycles consumed by it.
                rxq = (port.find_rxq_by_id(qid) or port.add_rxq(qid))
                rxq.pmd = pmd
                rxq.port = port
                cur_idx = pmd.cyc_idx
                prev_idx = (cur_idx - 1) % config.ncd_samples_max
                rx_diff = pmd.rx_cyc[cur_idx] - pmd.rx_cyc[prev_idx]
                cpu_diff = pmd.proc_cpu_cyc[
                    cur_idx] - pmd.proc_cpu_cyc[prev_idx]
                qcpu = (qcpu * cpu_diff) / 100
                qrx = (qcpu * rx_diff) / 100

            rxq.cpu_cyc[pmd.cyc_idx] = qcpu
        else:
            # From other line, we retrieve isolated flag.
            (sname, sval) = line.split(":")
            sname = re.sub("^\s+", "", sname)
            assert(sname == 'isolated ')
            pmd.isolated = {'true': True, 'false': False}[sval[1:]]

    # new state of pmds.
    new_pmd_l = sorted(pmd_map.keys())

    # skip modelling this object if states differ.
    if len(cur_pmd_l) > 0 and cur_pmd_l != new_pmd_l:
        raise ObjModelExc("pmds count differ")

    return pmd_map