def add_rxq(self, _id): """ Add new Dataif_Rxq object for this id in port.rxq_map, if one is not already available. Parameters ---------- _id : int id of rxq to be added. """ # check if this rxq is already available. rxq = self.find_rxq_by_id(_id) if rxq: raise ObjConsistencyExc("rxq %d already exists in %s" % (_id, self.name)) # create new rxq and add it in our rxq_map. rxq = Dataif_Rxq(_id) self.rxq_map[_id] = rxq # remember the port this rxq is tied with. rxq.port = self return rxq
def add_port(self, name, _id=None, numa_id=None): """ Add new Dataif_Port for this name in pmd.port_map, if one is not already available. Parameters ---------- name : str name of the port to be added. _id : int, optional id of the port (default is None) numa_id : int, optional numa id associated with this port (default is None) """ # check if a port of this name already exists. port = self.find_port_by_name(name) if port: raise ObjConsistencyExc("port %s already exists in pmd %d" % (name, self.id)) # create new port and add it in port_map. port_cls = Context.port_to_cls[name] port = port_cls() self.port_map[name] = port # store other input options. # TODO: port numa could actually be from sysfs to avoid # any configuration fault. port.id = _id port.numa_id = numa_id return port
def del_port(self, name): """ Delete Dataif_Port object of this name from pmd.port_map. Parameters ---------- name : str name of the port to be deleted. Raises ------ ObjConsistencyExc if no such port is not already available. """ # check if port of this name is already available. port = self.find_port_by_name(name) if not port: raise ObjConsistencyExc("port %s not found" % name) # remove this port from port map. self.port_map.pop(name, None)
def del_rxq(self, _id): """ Delete Dataif_Rxq object of this id from port.rxq_map. Parameters ---------- _id : int id of rxq to be deleted. Raises ------ ObjConsistencyExc if no such rxq is not already available. """ # check if this rxq is already available. rxq = self.find_rxq_by_id(_id) if not rxq: raise ObjConsistencyExc("rxq %d not found" % _id) # remove rxq from its map. self.rxq_map.pop(_id, None)
def get_pmd_rxqs(pmd_map): """ Collect info on how rxq is pinned with pmd, from the vswitch. Parameters ---------- pmd_map : dict mapping of pmd id and its Dataif_Pmd object. Raises ------ OsCommandExc if the given OS command did not succeed for some reason. ObjConsistencyExc if state of pmds in ncd differ. ObjParseExc if unable to retrieve info from switch. ObjModleExc if state of pmds in switch differ. """ nlog = Context.nlog # retrieve required data from the vswitch. cmd = "ovs-appctl dpif-netdev/pmd-rxq-show" data = util.exec_host_command(cmd) if not data: raise OsCommandExc("unable to collect data") # current state of pmds cur_pmd_l = sorted(pmd_map.keys()) # sname and sval stores parsed string's key and value. sname, sval = None, None # current pmd object to be used in every line under parse. pmd = None for line in data.splitlines(): if line.startswith('pmd thread'): # In below matching line, we retrieve core id (aka pmd id) # and core id. linesre = re.search(r'pmd thread numa_id (\d+) core_id (\d+):', line) numa_id = int(linesre.groups()[0]) core_id = int(linesre.groups()[1]) if core_id not in pmd_map: raise ObjConsistencyExc( "trying to add new pmd %d in mid of ncd!.. aborting! ") pmd = pmd_map[core_id] assert (pmd.numa_id == numa_id) nlog.debug("pmd %d in iteration %d" % (pmd.id, pmd.cyc_idx)) elif re.match(r'\s.*port: .*', line): # From this line, we retrieve cpu usage of rxq. linesre = re.search( r'\s.*port:\s([A-Za-z0-9_-]+)\s*' r'queue-id:\s*(\d+)\s*(?=\((enabled)\))?.*' r'pmd usage:\s*(\d+|NOT AVAIL)\s*?', line) pname = linesre.groups()[0] qid = int(linesre.groups()[1]) enabled_flag = linesre.groups()[2] try: qcpu = int(linesre.groups()[3]) except ValueError: qcpu = linesre.groups()[3] if (qcpu == 'NOT AVAIL'): raise ObjParseExc("pmd usage unavailable for now") else: raise ObjParseExc("error parsing line %s" % line) # get the Dataif_Port owning this rxq. port = pmd.find_port_by_name(pname) if not port: port = pmd.add_port(pname) # update port attributes now. port.id = Context.port_to_id[pname] port.numa_id = pmd.numa_id port_cls = Context.port_to_cls[pname] port_cls.rebalance = True # check whether this rxq was being rebalanced. if qid in port.rxq_rebalanced: raise ObjConsistencyExc( "stale %s object found while parsing rxq in pmd ..") else: # skip updating zero index as rxq counters works only # with the differences between pmd stats in every # sampling slot. if pmd.cyc_idx == 0: continue # port not in rebalancing state, so update rxq for its # cpu cycles consumed by it. rxq = (port.find_rxq_by_id(qid) or port.add_rxq(qid)) rxq.pmd = pmd rxq.port = port cur_idx = pmd.cyc_idx prev_idx = (cur_idx - 1) % config.ncd_samples_max rx_diff = pmd.rx_cyc[cur_idx] - pmd.rx_cyc[prev_idx] pcpu_diff = pmd.proc_cpu_cyc[cur_idx] - pmd.proc_cpu_cyc[ prev_idx] icpu_diff = pmd.idle_cpu_cyc[cur_idx] - pmd.idle_cpu_cyc[ prev_idx] cpu_diff = pcpu_diff + icpu_diff qcpu_diff = int((qcpu * cpu_diff) / 100) qrx_diff = int((qcpu * rx_diff) / 100) rxq.cpu_cyc[pmd.cyc_idx] = qcpu_diff rxq.rx_cyc[pmd.cyc_idx] = qrx_diff if (enabled_flag == "enabled"): rxq.enabled = True else: rxq.enabled = False else: # From other line, we retrieve isolated flag. (sname, sval) = line.split(":") sname = re.sub(r"^\s+", "", sname) assert (sname == 'isolated ') pmd.isolated = {'true': True, 'false': False}[sval[1:]] # new state of pmds. new_pmd_l = sorted(pmd_map.keys()) # skip modelling this object if states differ. if len(cur_pmd_l) > 0 and cur_pmd_l != new_pmd_l: raise ObjModelExc("pmds count differ") return pmd_map
def get_pmd_stats(pmd_map): """ Collect stats on every pmd running in the system and update pmd_map. In every sampling iteration, these stats are stored in corresponding sampling slots. Parameters ---------- pmd_map : dict mapping of pmd id and its Dataif_Pmd object. Raises ------ OsCommandExc if the given OS command did not succeed for some reason. ObjConsistencyExc if state of pmds in ncd differ. ObjModleExc if state of pmds in switch differ. """ nlog = Context.nlog # retrieve required data from the vswitch. cmd = "ovs-appctl dpif-netdev/pmd-stats-show" data = util.exec_host_command(cmd) if not data: raise OsCommandExc("unable to collect data") # current state of pmds cur_pmd_l = sorted(pmd_map.keys()) # sname and sval stores parsed string's key and value. sname, sval = None, None # current pmd object to be used in every line under parse. pmd = None for line in data.splitlines(): if line.startswith("pmd thread"): # In below matching line, we retrieve core id (aka pmd id) # and core id. linesre = re.search(r'pmd thread numa_id (\d+) core_id (\d+):', line) numa_id = int(linesre.groups()[0]) core_id = int(linesre.groups()[1]) # If in mid of sampling, we should have pmd_map having # entry for this core id. if core_id in pmd_map: pmd = pmd_map[core_id] # Check to ensure we are good to go as local should # always be used. assert (pmd.numa_id == numa_id) # Store following stats in new sampling slot. pmd.cyc_idx = (pmd.cyc_idx + 1) % config.ncd_samples_max nlog.debug("pmd %d in iteration %d" % (pmd.id, pmd.cyc_idx)) else: # Very first sampling for each pmd occur in this # clause. Just ensure, no new pmd is added from system # reconfiguration. if len(pmd_map) != 0 and not pmd: raise ObjConsistencyExc( "trying to add new pmd %d in mid of ncd!.. aborting! ") # create new entry in pmd_map for this pmd. pmd = Dataif_Pmd(core_id) pmd_map[pmd.id] = pmd nlog.debug("added pmd %s stats.." % pmd.id) # numa id of pmd is of core's. pmd.numa_id = numa_id elif line.startswith("main thread"): # end of pmd stats break else: # From other lines, we retrieve stats of the pmd. (sname, sval) = line.split(":") sname = re.sub(r"^\s+", "", sname) sval = sval[1:].split() if sname == "packets received": pmd.rx_cyc[pmd.cyc_idx] = int(sval[0]) elif sname == "idle cycles": pmd.idle_cpu_cyc[pmd.cyc_idx] = int(sval[0]) elif sname == "processing cycles": pmd.proc_cpu_cyc[pmd.cyc_idx] = int(sval[0]) # new state of pmds. new_pmd_l = sorted(pmd_map.keys()) # skip modelling this object if states differ. if len(cur_pmd_l) > 0 and cur_pmd_l != new_pmd_l: raise ObjModelExc("pmds count differ") return pmd_map
def rebalance_dryrun_by_cyc(pmd_map): """ Rebalance pmds based on their current load of traffic in it and it is just a dry-run. In every iteration of this dry run, we keep re-assigning rxqs to suitable pmds, at the same time we use actual load on each rxq to reflect the estimated pmd load after every optimization. To re-pin rxqs, the logic used is to order pmds based on top consuming rxqs and traverse on this list forward and backward. Parameters ---------- pmd_map : dict mapping of pmd id and its Dataif_Pmd object. """ nlog = Context.nlog n_rxq_rebalanced = 0 if len(pmd_map) <= 1: nlog.debug("not enough pmds to rebalance ..") return -1 # Calculate current load on every pmd. update_pmd_load(pmd_map) if not pmd_need_rebalance(pmd_map): nlog.debug("no pmd needs rebalance ..") return -1 # Sort pmds in pmd_map based on busier rxqs and then use some # constant order that system provides, to fill up the list. pmd_list = [] rr_cpus = util.rr_cpu_in_numa() for cpu in rr_cpus: if cpu in pmd_map: pmd_list.append(pmd_map[cpu]) rxq_list = [] pmd_rxq_n = {} numa_pmd_n = {} for pmd in pmd_list: pmd_rxq_n[pmd.id] = 0 if pmd.numa_id not in numa_pmd_n: numa_pmd_n[pmd.numa_id] = 0 numa_pmd_n[pmd.numa_id] += 1 for port in pmd.port_map.values(): rxq_list += port.rxq_map.values() rxq_load_list = sorted(rxq_list, key=lambda o: sum(o.cpu_cyc), reverse=True) pmd_list_forward = [] for rxq in rxq_load_list: if rxq.pmd not in pmd_list_forward: pmd_list_forward.append(rxq.pmd) if (len(pmd_list_forward) < len(pmd_list)): for pmd in pmd_list: if pmd not in pmd_list_forward: pmd_list_forward.append(pmd) nlog.debug("cpu numbering based on system info is %s" % (",".join(str(x) for x in rr_cpus))) nlog.debug("traverse order on pmds based on rxqs is %s" % (",".join(str(x.id) for x in pmd_list_forward))) pmd_list_reverse = pmd_list_forward[::-1] pmd_list = pmd_list_forward idx_forward = True rpmd = None rpmd_gen = (o for o in pmd_list) for rxq in rxq_load_list: port = rxq.port pmd = rxq.pmd if len(port.rxq_map) == 0: continue rpmd = None while (not rpmd): for rpmd in rpmd_gen: # choose pmd from same numa. if (rpmd.numa_id == port.numa_id): # for top consuming rxqs. if (pmd_rxq_n[pmd.id] == 0): if (rpmd.id == pmd.id): pmd_rxq_n[pmd.id] += 1 break else: continue # owning pmd has already taken topper pmd_rxq_n[rpmd.id] += 1 break else: pmd_rxq_s = sum(map(lambda x: int(x > 0), pmd_rxq_n.values())) if (pmd_rxq_s < numa_pmd_n[port.numa_id]): rpmd_gen = (o for o in pmd_list) rpmd = None continue # reverse traverse direction if idx_forward: pmd_list = pmd_list_reverse idx_forward = False else: pmd_list = pmd_list_forward idx_forward = True rpmd_gen = (o for o in pmd_list) rpmd = None if rpmd: break # check while else for last rpmd if not rpmd: raise ObjConsistencyExc( "no rebalancing pmd on numa(%d) for port %s rxq %d.." % (port.numa_id, port.name, rxq.id)) assert (rpmd.numa_id == port.numa_id) if pmd.id == rpmd.id: nlog.info( "no change needed for rxq %d (port %s cycles %s) in pmd %d" % (rxq.id, port.name, sum(rxq.cpu_cyc), pmd.id)) continue # move this rxq into the rebalancing pmd. nlog.info("moving rxq %d (port %s cycles %s) from pmd %d into pmd %d" % (rxq.id, port.name, sum(rxq.cpu_cyc), pmd.id, rpmd.id)) rport = rpmd.find_port_by_name(port.name) if not rport: rport = rpmd.add_port(port.name, port.id, port.numa_id) rrxq = rport.add_rxq(rxq.id) n_rxq_rebalanced += 1 assert (rport.numa_id == port.numa_id) # Copy cpu and rxq cycles of this rxq into its clone in # in rebalancing pmd (for dry-run). rrxq.cpu_cyc = copy.deepcopy(rxq.cpu_cyc) rrxq.rx_cyc = copy.deepcopy(rxq.rx_cyc) cur_idx = pmd.cyc_idx for i in range(0, config.ncd_samples_max - 1): for j in range(0, i + 1): # update rebalancing pmd for cpu cycles and rx count. rpmd.proc_cpu_cyc[cur_idx + j] += rrxq.cpu_cyc[cur_idx] rpmd.idle_cpu_cyc[cur_idx + j] -= rrxq.cpu_cyc[cur_idx] rpmd.rx_cyc[cur_idx + j] += rrxq.rx_cyc[cur_idx] # update current pmd for cpu cycles and rx count. pmd.proc_cpu_cyc[cur_idx + j] -= rrxq.cpu_cyc[cur_idx] pmd.idle_cpu_cyc[cur_idx + j] += rrxq.cpu_cyc[cur_idx] pmd.rx_cyc[cur_idx + j] -= rrxq.rx_cyc[cur_idx] cur_idx = (cur_idx - 1) % config.ncd_samples_max # No more tracking of this rxq in current pmd. port.del_rxq(rxq.id) port.rxq_rebalanced[rxq.id] = rpmd.id rrxq.pmd = pmd if n_rxq_rebalanced: # Calculate current load on every pmd. update_pmd_load(pmd_map) return n_rxq_rebalanced
def rebalance_dryrun_by_iq(pmd_map): """ Rebalance pmds based on their current load of traffic in it and it is just a dry-run. In every iteration of this dry run, we keep re-assigning rxqs to suitable pmds, at the same time we use actual load on each rxq to reflect the estimated pmd load after every optimization. To re-pin rxqs, the logic used is to move idle (or less loaded) rx queues into idle (or less loaded) pmds so that, busier rxq is given more processing cycles by busy pmd. Parameters ---------- pmd_map : dict mapping of pmd id and its Dataif_Pmd object. """ nlog = Context.nlog n_rxq_rebalanced = 0 if len(pmd_map) <= 1: nlog.debug("not enough pmds to rebalance ..") return -1 # Calculate current load on every pmd. update_pmd_load(pmd_map) if not pmd_need_rebalance(pmd_map): nlog.debug("no pmd needs rebalance ..") return -1 # Sort pmds in pmd_map based on the rxq load, in descending order. # Pick the pmd which is more loaded from one end of the list. pmd_load_list = sorted(pmd_map.values(), key=lambda o: o.pmd_load) # Split list into busy and less loaded. bpmd_load_list = [] ipmd_load_list = [] for pmd in pmd_load_list: # pmd load of above configured threshold if pmd.pmd_load > config.ncd_pmd_core_threshold: bpmd_load_list.append(pmd) # skip pmd when its rxq count is one i.e pmd has just one rxq, # and this rxq is already busy (hencs, pmd was busy). elif (pmd.count_rxq() == 1 and pmd.pmd_load >= config.ncd_pmd_core_threshold): continue # rest of the pmds are less loaded (or idle). else: ipmd_load_list.insert(0, pmd) ipmd = None ipmd_gen = (o for o in ipmd_load_list) for pmd in bpmd_load_list: # As busy and idles (or less loaded) pmds are identified, # move less loaded rxqs from busy pmd into idle pmd. for port in pmd.port_map.values(): # A port under dry-run may be empty now. if len(port.rxq_map) == 0: continue # As we pick one or more rxqs for every port in this pmd, # we leave atleast one rxq, not to make this busy pmd as # idle again. if pmd.count_rxq() <= 1: continue if not ipmd or (ipmd.numa_id != port.numa_id): for ipmd in ipmd_gen: # Current pmd and rebalancing pmd should be in same numa. if (ipmd.numa_id == port.numa_id): break else: ipmd_gen = (o for o in ipmd_load_list) if not ipmd: nlog.debug("no rebalancing pmd on this numa..") continue # Sort rxqs based on their current load, in ascending order. pmd_proc_cyc = sum(pmd.proc_cpu_cyc) rxq_load_list = sorted(port.rxq_map.values(), key=lambda o: ((sum(o.cpu_cyc) * 100) / pmd_proc_cyc)) # pick one rxq to rebalance and this was least loaded in this pmd. try: rxq = rxq_load_list.pop(0) except IndexError: raise ObjConsistencyExc("rxq found empty ..") # move this rxq into the rebalancing pmd. nlog.info( "moving rxq %d (port %s cycles %s) from pmd %d into pmd %d" % (rxq.id, port.name, sum(rxq.cpu_cyc), pmd.id, ipmd.id)) iport = ipmd.find_port_by_name(port.name) if not iport: iport = ipmd.add_port(port.name, port.id, port.numa_id) irxq = iport.add_rxq(rxq.id) n_rxq_rebalanced += 1 assert (iport.numa_id == port.numa_id) # Copy cpu cycles of this rxq into its clone in # in rebalancing pmd (for dry-run). irxq.cpu_cyc = copy.deepcopy(rxq.cpu_cyc) irxq.rx_cyc = copy.deepcopy(rxq.rx_cyc) cur_idx = pmd.cyc_idx for i in range(0, config.ncd_samples_max - 1): for j in range(0, i + 1): # update rebalancing pmd for cpu cycles and rx count. ipmd.proc_cpu_cyc[cur_idx + j] += irxq.cpu_cyc[cur_idx] ipmd.idle_cpu_cyc[cur_idx + j] -= irxq.cpu_cyc[cur_idx] ipmd.rx_cyc[cur_idx + j] += irxq.rx_cyc[cur_idx] # update current pmd for cpu cycles and rx count. pmd.proc_cpu_cyc[cur_idx + j] -= irxq.cpu_cyc[cur_idx] pmd.idle_cpu_cyc[cur_idx + j] += irxq.cpu_cyc[cur_idx] pmd.rx_cyc[cur_idx + j] -= irxq.rx_cyc[cur_idx] cur_idx = (cur_idx - 1) % config.ncd_samples_max # No more tracking of this rxq in current pmd. port.del_rxq(rxq.id) port.rxq_rebalanced[rxq.id] = ipmd.id irxq.pmd = pmd # Calculate current load on every pmd. update_pmd_load(pmd_map) # check if rebalancing pmd has got enough work. update_pmd_load(pmd_map) if ipmd.pmd_load >= config.ncd_pmd_core_threshold: nlog.info("removing pmd %d from idle pmd list" % ipmd.id) ipmd_load_list.remove(ipmd) ipmd = None return n_rxq_rebalanced
def get_pmd_rxqs(pmd_map): """ Collect info on how rxq is pinned with pmd, from the vswitch. Parameters ---------- pmd_map : dict mapping of pmd id and its Dataif_Pmd object. Raises ------ OsCommandExc if the given OS command did not succeed for some reason. ObjConsistencyExc if state of pmds in ncd differ. ObjParseExc if unable to retrieve info from switch. ObjModleExc if state of pmds in switch differ. """ nlog = Context.nlog # retrieve required data from the vswitch. cmd = "ovs-appctl dpif-netdev/pmd-rxq-show" data = util.exec_host_command(cmd) if not data: raise OsCommandExc("unable to collect data") # current state of pmds cur_pmd_l = sorted(pmd_map.keys()) # sname and sval stores parsed string's key and value. sname, sval = None, None # current pmd object to be used in every line under parse. pmd = None for line in data.splitlines(): if line.startswith('pmd thread'): # In below matching line, we retrieve core id (aka pmd id) # and core id. linesre = re.search(r'pmd thread numa_id (\d+) core_id (\d+):', line) numa_id = int(linesre.groups()[0]) core_id = int(linesre.groups()[1]) if core_id not in pmd_map: raise ObjConsistencyExc( "trying to add new pmd %d in mid of ncd!.. aborting! ") pmd = pmd_map[core_id] assert(pmd.numa_id == numa_id) nlog.debug("pmd %d in iteration %d" % (pmd.id, pmd.cyc_idx)) elif re.match(r'\s.*port: .*', line): # From this line, we retrieve cpu usage of rxq. linesre = re.search(r'\s.*port:\s([A-Za-z0-9_-]+)\s*' r'queue-id:\s*(\d+)\s*' r'pmd usage:\s*(\d+|NOT AVAIL)\s*?', line) pname = linesre.groups()[0] qid = int(linesre.groups()[1]) try: qcpu = int(linesre.groups()[2]) except ValueError: qcpu = linesre.groups()[2] if (qcpu == 'NOT AVAIL'): raise ObjParseExc("pmd usage unavailable for now") else: raise ObjParseExc("error parsing line %s" % line) # get the Dataif_Port owning this rxq. port = pmd.find_port_by_name(pname) if not port: port = pmd.add_port(pname) # update port attributes now. port.id = Context.port_to_id[pname] port.numa_id = pmd.numa_id port_cls = Context.port_to_cls[pname] port_cls.rebalance = True # check whether this rxq was being rebalanced. if qid in port.rxq_rebalanced: # In dry-run, we need to update cpu cycles consumed by # this rxq (through current pmd), into the processing # cycles of the rebalancing pmd. Then the load of the # rebalancing pmd could be estimated appropriately. reb_pmd_id = port.rxq_rebalanced[qid] reb_pmd = pmd_map[reb_pmd_id] reb_port = reb_pmd.find_port_by_name(port.name) rxq = reb_port.find_rxq_by_id(qid) # qcpu is in percentage in this data, so we convert it # into actual cycles using processing cycles that this # pmd consumed. # qrx is approximate count of packets that this rxq # received. cur_idx = pmd.cyc_idx prev_idx = (cur_idx - 1) % config.ncd_samples_max rx_diff = pmd.rx_cyc[cur_idx] - pmd.rx_cyc[prev_idx] cpu_diff = pmd.proc_cpu_cyc[ cur_idx] - pmd.proc_cpu_cyc[prev_idx] qrx = (qcpu * rx_diff) / 100 qcpu = (qcpu * cpu_diff) / 100 # update rebalancing pmd for cpu cycles and rx count. reb_pmd.proc_cpu_cyc[cur_idx] += qcpu reb_pmd.idle_cpu_cyc[cur_idx] -= qcpu reb_pmd.rx_cyc[pmd.cyc_idx] += qrx # update current pmd for cpu cycles and rx count. pmd.proc_cpu_cyc[pmd.cyc_idx] -= qcpu pmd.idle_cpu_cyc[pmd.cyc_idx] += qcpu pmd.rx_cyc[pmd.cyc_idx] -= qrx else: # port not in rebalancing state, so update rxq for its # cpu cycles consumed by it. rxq = (port.find_rxq_by_id(qid) or port.add_rxq(qid)) rxq.pmd = pmd rxq.port = port cur_idx = pmd.cyc_idx prev_idx = (cur_idx - 1) % config.ncd_samples_max rx_diff = pmd.rx_cyc[cur_idx] - pmd.rx_cyc[prev_idx] cpu_diff = pmd.proc_cpu_cyc[ cur_idx] - pmd.proc_cpu_cyc[prev_idx] qcpu = (qcpu * cpu_diff) / 100 qrx = (qcpu * rx_diff) / 100 rxq.cpu_cyc[pmd.cyc_idx] = qcpu else: # From other line, we retrieve isolated flag. (sname, sval) = line.split(":") sname = re.sub("^\s+", "", sname) assert(sname == 'isolated ') pmd.isolated = {'true': True, 'false': False}[sval[1:]] # new state of pmds. new_pmd_l = sorted(pmd_map.keys()) # skip modelling this object if states differ. if len(cur_pmd_l) > 0 and cur_pmd_l != new_pmd_l: raise ObjModelExc("pmds count differ") return pmd_map
def rebalance_dryrun_by_cyc(pmd_map): """ Rebalance pmds based on their current load of traffic in it and it is just a dry-run. In every iteration of this dry run, we keep re-assigning rxqs to suitable pmds, at the same time we use actual load on each rxq to reflect the estimated pmd load after every optimization. To re-pin rxqs, the logic used is to order pmds based on top consuming rxqs and traverse on this list forward and backward. Parameters ---------- pmd_map : dict mapping of pmd id and its Dataif_Pmd object. """ nlog = Context.nlog n_rxq_rebalanced = 0 if len(pmd_map) <= 1: nlog.debug("not enough pmds to rebalance ..") return n_rxq_rebalanced # Calculate current load on every pmd. update_pmd_load(pmd_map) if not pmd_need_rebalance(pmd_map): nlog.debug("no pmd needs rebalance ..") return n_rxq_rebalanced # Sort pmds in pmd_map based on busier rxqs and then use some # constant order that system provides, to fill up the list. pmd_list = [] rr_cpus = util.rr_cpu_in_numa() for cpu in rr_cpus: if cpu in pmd_map: pmd_list.append(pmd_map[cpu]) rxq_list = [] pmd_rxq_n = {} for pmd in pmd_list: pmd_rxq_n[pmd.id] = 0 for port in pmd.port_map.values(): rxq_list += port.rxq_map.values() rxq_load_list = sorted( rxq_list, key=lambda o: sum(o.cpu_cyc), reverse=True) pmd_list_forward = [] for rxq in rxq_load_list: if rxq.pmd not in pmd_list_forward: pmd_list_forward.append(rxq.pmd) if (len(pmd_list_forward) < len(pmd_list)): for pmd in pmd_list: if pmd not in pmd_list_forward: pmd_list_forward.append(pmd) nlog.debug("cpu numbering based on system info is %s" % (",".join(str(x) for x in rr_cpus))) nlog.debug("traverse order on pmds based on rxqs is %s" % (",".join(str(x.id) for x in pmd_list_forward))) pmd_list_reverse = pmd_list_forward[::-1] pmd_list = pmd_list_forward idx_forward = True rpmd = None rpmd_gen = (o for o in pmd_list) for rxq in rxq_load_list: port = rxq.port pmd = rxq.pmd if len(port.rxq_map) == 0: continue rpmd = None while (not rpmd): for rpmd in rpmd_gen: # choose pmd from same numa. if (rpmd.numa_id == port.numa_id): # for top consuming rxqs. if (pmd_rxq_n[pmd.id] == 0): if(rpmd.id == pmd.id): pmd_rxq_n[pmd.id] += 1 break else: continue # owning pmd has already taken topper pmd_rxq_n[rpmd.id] += 1 break else: pmd_n = len(pmd_list) pmd_rxq_s = sum([p for p in pmd_rxq_n.values()]) if ((pmd_n % n_rxq_rebalanced) != 0 and pmd_rxq_s < pmd_n): rpmd_gen = (o for o in pmd_list) rpmd = None continue # reverse traverse direction if idx_forward: pmd_list = pmd_list_reverse idx_forward = False else: pmd_list = pmd_list_forward idx_forward = True rpmd_gen = (o for o in pmd_list) rpmd = None # check while else for last rpmd if not rpmd: raise ObjConsistencyExc( "no rebalancing pmd on numa(%d) for port %s rxq %d.." % (port.numa_id, port.name, rxq.id)) assert(rpmd.numa_id == port.numa_id) if pmd.id == rpmd.id: nlog.info("no change needed for rxq %d (port %s) in pmd %d" % (rxq.id, port.name, pmd.id)) n_rxq_rebalanced += 1 continue # move this rxq into the rebalancing pmd. nlog.info("moving rxq %d (port %s) from pmd %d into pmd %d .." % (rxq.id, port.name, pmd.id, rpmd.id)) rport = rpmd.find_port_by_name(port.name) if not rport: rport = rpmd.add_port(port.name, port.id, port.numa_id) rrxq = rport.add_rxq(rxq.id) n_rxq_rebalanced += 1 assert(rport.numa_id == port.numa_id) # Copy cpu cycles of this rxq into its clone in # in rebalancing pmd (for dry-run). rrxq.cpu_cyc = copy.deepcopy(rxq.cpu_cyc) # No more tracking of this rxq in current pmd. port.del_rxq(rxq.id) # Until dry-run is completed and rebalance completed, # this rxq should know its current pmd, even it is # with rebalancing pmd. Only then, we can derive cpu # usage of this rxq from its current pmd (as we scan # data in each sampling interval). opmd = rxq.pmd oport = opmd.find_port_by_name(port.name) oport.rxq_rebalanced[rxq.id] = rpmd.id rrxq.pmd = opmd return n_rxq_rebalanced