def main(): args = _syntax().parse_args() # Initialize stats client stats_ep = args.stats_ep if args.stats_ep else "ipc:///var/run/virtio-forwarder/stats" stats_ctx = zmq.Context() stats_sock = stats_ctx.socket(zmq.REQ) stats_sock.setsockopt(zmq.LINGER, 0) stats_sock.setsockopt(zmq.SNDTIMEO, 0) stats_sock.setsockopt(zmq.RCVTIMEO, 2000) stats_sock.connect(stats_ep) # Initialize core pinning client sched_ep = args.sched_ep if args.sched_ep else "ipc:///var/run/virtio-forwarder/core_sched" sched_ctx = zmq.Context() sched_sock = sched_ctx.socket(zmq.REQ) sched_sock.setsockopt(zmq.LINGER, 0) sched_sock.setsockopt(zmq.SNDTIMEO, 0) sched_sock.setsockopt(zmq.RCVTIMEO, 2000) sched_sock.connect(sched_ep) # Get relay stats stats_request = relay_pb2.StatsRequest() stats_request.include_inactive = False stats_sock.send(stats_request.SerializePartialToString()) stats_response = relay_pb2.StatsResponse() stats_response.ParseFromString(stats_sock.recv()) assert stats_response.IsInitialized() # Get worker core mapping msg = relay_pb2.CoreSchedRequest(op=relay_pb2.CoreSchedRequest.GET_EAL_CORES) sched_sock.send(msg.SerializePartialToString()) worker_cores = relay_pb2.CoreSchedResponse() worker_cores.ParseFromString(sched_sock.recv()) worker_cores = worker_cores.eal_cores if args.print_core_map: print "Worker cores affinities:" print_core_mapping(stats_response, worker_cores) sys.exit(0) # Initialize mapping structure relay_mappings = [] for mapping in args.virtio_cpu: try: n_relay = int(mapping.split(':')[0]) virtio2vf = int(mapping.split(':')[1].split(',')[0]) vf2virtio = int(mapping.split(':')[1].split(',')[1]) except ValueError: print "Invalid virtio-cpu command line format. Usage: virtio-cpu=n:c1,c2" sys.exit(0) print "Setting relay %d=%d,%d" % (n_relay, virtio2vf, vf2virtio) req = relay_pb2.CoreSchedRequest.RelayCPU() req.relay_number = n_relay req.virtio2vf_cpu = virtio2vf req.vf2virtio_cpu = vf2virtio relay_mappings.append(req) if relay_mappings != []: # Trigger cpu migration sched_req = relay_pb2.CoreSchedRequest(op=relay_pb2.CoreSchedRequest.UPDATE, relay_cpu_map=relay_mappings) sched_sock.send(sched_req.SerializePartialToString()) # Gather response sched_response = relay_pb2.CoreSchedResponse() sched_response.ParseFromString(sched_sock.recv()) if sched_response.status == relay_pb2.CoreSchedResponse.OK: print "Scheduler response: OK" else: print "Scheduler response: ERROR" # Print new mapping stats_sock.send(stats_request.SerializePartialToString()) stats_response = relay_pb2.StatsResponse() stats_response.ParseFromString(stats_sock.recv()) print "New worker core mapping:" print_core_mapping(stats_response, worker_cores) else: print "Worker cores affinities:" print_core_mapping(stats_response, worker_cores)
def main(): args = _syntax().parse_args() poll_interval = 2. if args.poll_interval: poll_interval = args.poll_interval # Initialize stats client stats_ep = args.stats_ep if args.stats_ep else "ipc:///var/run/virtio-forwarder/stats" stats_sock = open_socket(stats_ep) # Send message # First message is strictly not necessary, but it is done # here in order to get a reference time for VIO4WD rate stats. stats_request = relay_pb2.StatsRequest() stats_request.include_inactive = False stats_request.delay = 200 stats_response = relay_pb2.StatsResponse() stats_sock = reconnect_send_recv(stats_sock, 'stats', stats_request, stats_response, stats_ep, poll_interval)[1] # Initialize core scheduler client sched_ep = args.sched_ep if args.sched_ep else "ipc:///var/run/virtio-forwarder/core_sched" sched_sock = open_socket(sched_ep) # Get worker core mapping sched_request = relay_pb2.CoreSchedRequest( op=relay_pb2.CoreSchedRequest.GET_EAL_CORES) sched_response = relay_pb2.CoreSchedResponse() sched_sock = reconnect_send_recv(sched_sock, 'core scheduler', sched_request, sched_response, sched_ep, poll_interval)[1] worker_cores = sched_response.eal_cores max_relay = RelayRateExt(MAX_PPS, MAX_MBPS / 8 * 1e6) max_load = 1 * (max_relay.estimate_vm2vf_load() + max_relay.estimate_vf2vm_load()) max_chars = 60 try: # [ Main processing loop while True: # Get stats relays = [] stats_response = relay_pb2.StatsResponse() err, stats_sock = reconnect_send_recv(stats_sock, 'stats', stats_request, stats_response, stats_ep, poll_interval) relays = stats_response.relay # Gather worker cores again if the server went down. if err: sched_response = relay_pb2.CoreSchedResponse() sched_sock = reconnect_send_recv(sched_sock, 'core scheduler', sched_request, sched_response, sched_ep, poll_interval)[1] worker_cores = sched_response.eal_cores if len(relays) == 0: time.sleep(poll_interval) continue # There are running relays # Calculate worker loads worker_loads = {i: 0. for i in worker_cores} for relay in relays: rate = RelayRate(relay) worker_loads[relay.cpu.vm_to_vf] += rate.estimate_vm2vf_load() worker_loads[relay.cpu.vf_to_vm] += rate.estimate_vf2vm_load() os.system('clear') print("CPU loads") for cpu, load in worker_loads.items(): try: bars = int(round(load / max_load * max_chars)) except ZeroDivisionError: bars = 0 print("CPU{}:".format(cpu), "-" * bars) time.sleep(poll_interval) # ] End while true except KeyboardInterrupt: pass
def main(): global must_run args = _syntax().parse_args() # Define signal handler signal.signal(signal.SIGTERM, sig_handler) signal.signal(signal.SIGINT, sig_handler) signal.signal(signal.SIGHUP, sig_handler) # Initiate logging syslog.openlog(ident='vio4wd_core_scheduler', logoption=syslog.LOG_PID, facility=syslog.LOG_USER) loglevel = args.loglevel if args.loglevel else 7 if loglevel > 7 or loglevel < 0: loglevel = 7 syslog.syslog( syslog.LOG_WARNING, "Specified invalid loglevel. Defaulting to %d" % loglevel) syslog.setlogmask(syslog.LOG_UPTO(loglevel)) syslog.syslog(syslog.LOG_NOTICE, "Dynamic load balancing initiated...") if args.sensitivity < 0: args.sensitivity = 0.25 syslog.syslog( syslog.LOG_WARNING, "Specified invalid sensitivity. Defaulting to %f" % args.sensitivity) poll_interval = 5. if args.poll_interval: poll_interval = args.poll_interval syslog.syslog(syslog.LOG_INFO, "Polling every %.2f seconds." % poll_interval) # Initialize stats client stats_ep = args.stats_ep if args.stats_ep else "ipc:///var/run/virtio-forwarder/stats" syslog.syslog(syslog.LOG_INFO, "Connecting to stats server on %s..." % stats_ep) stats_sock = open_socket(stats_ep) # Get stats for initial worker core mapping stats_request = relay_pb2.StatsRequest() stats_request.include_inactive = False stats_request.delay = 200 stats_response = relay_pb2.StatsResponse() stats_sock = reconnect_send_recv(stats_sock, 'stats', stats_request, stats_response, stats_ep, poll_interval)[1] # Initialize core scheduler client sched_ep = args.sched_ep if args.sched_ep else "ipc:///var/run/virtio-forwarder/core_sched" syslog.syslog(syslog.LOG_INFO, "Connecting to core_scheduler server on %s..." % sched_ep) sched_sock = open_socket(sched_ep) # Get worker core mapping sched_request = relay_pb2.CoreSchedRequest( op=relay_pb2.CoreSchedRequest.GET_EAL_CORES) sched_response = relay_pb2.CoreSchedResponse() sched_sock = reconnect_send_recv(sched_sock, 'core scheduler', sched_request, sched_response, sched_ep, poll_interval)[1] worker_cores = sched_response.eal_cores syslog.syslog(syslog.LOG_DEBUG, "Worker cores at startup:") print_core_mapping(stats_response, worker_cores, syslog.LOG_DEBUG) # Gather NUMA information node_cpus = {} for i in range(get_max_node() + 1): cpus = get_node_cpus(i) workers_on_numa = cpus & set(worker_cores) if len(workers_on_numa) > 0: node_cpus[i] = cpus.copy() # [ main processing loop while must_run: # Get stats relays = [] stats_response = relay_pb2.StatsResponse() err, stats_sock = reconnect_send_recv(stats_sock, 'stats', stats_request, stats_response, stats_ep, poll_interval) relays = stats_response.relay # Gather worker cores again if the server went down. if err: sched_response = relay_pb2.CoreSchedResponse() sched_sock = reconnect_send_recv(sched_sock, 'core scheduler', sched_request, sched_response, sched_ep, poll_interval)[1] worker_cores = sched_response.eal_cores syslog.syslog(syslog.LOG_DEBUG, "Worker cores upon reconnection:") print_core_mapping(stats_response, worker_cores, syslog.LOG_DEBUG) if len(relays) == 0: time.sleep(poll_interval) continue # There are running relays # Initialize rate and current mapping structures per NUMA node mapping_changed = False numa_info = {} for node, cpus in node_cpus.iteritems(): numa_info[node] = (list(cpus & set(worker_cores)), {}, []) for relay in relays: # Rates rate = RelayRate(relay) rate.estimate_vm2vf_load() rate.estimate_vf2vm_load() try: numa_info[relay.socket_id][1][relay.id] = rate except KeyError: syslog.syslog( syslog.LOG_WARNING, "%d is not a valid socket id! Relay %d will not form part of the optimization." % (relay.socket_id, relay.id)) continue # Current mapping req = relay_pb2.CoreSchedRequest.RelayCPU() req.relay_number = relay.id req.virtio2vf_cpu = relay.cpu.vm_to_vf req.vf2virtio_cpu = relay.cpu.vf_to_vm numa_info[relay.socket_id][2].append(req) # Merge NUMA infos if global optimization was requested if args.global_numa_opt: e0 = list(set(worker_cores)) e1 = {} e2 = [] for numa_node, info in numa_info.iteritems(): e1.update(info[1]) e2 = e2 + info[2] numa_info = {-1: (e0, e1, e2)} # [ [NUMA-local] optimization for numa_node, info in numa_info.iteritems(): workers = info[0] # set of worker cores relay_rates = info[1] # dict of relay rate objects current_mappings = info[ 2] # list of core mappings (CoreSchedRequest objects) if len(relay_rates) == 0: continue # Distribute loads # Use simple round robin algorithm: Optimal optimization would entail # integer programming which may be intractable when there are many # workers and/or relays. new_mappings, fval = round_robin(relay_rates, workers) # Check if a reshuffle is warranted: # We use coefficient of variation since it is unitless. # If the new mapping results in loads 'tighter' by some margin, # apply the new mapping, else do nothing. Parameter might require tuning. cv = fval # coefficient of variation prev_cv = get_variation_coefficient(current_mappings, relay_rates, workers) if cv == -1: # No loads are running. Do nothing. pass elif (cv + args.sensitivity) < prev_cv: syslog.syslog(syslog.LOG_INFO, "Migrating workers on NUMA %d..." % numa_node) # Trigger cpu migration sched_req = relay_pb2.CoreSchedRequest( op=relay_pb2.CoreSchedRequest.UPDATE, relay_cpu_map=new_mappings) sched_sock.send(sched_req.SerializePartialToString()) # Gather response # Do not attempt infinite reconnect here. sched_response = relay_pb2.CoreSchedResponse() try: sched_response.ParseFromString(sched_sock.recv()) if sched_response.status == relay_pb2.CoreSchedResponse.OK: syslog.syslog(syslog.LOG_INFO, "Scheduler response: OK") mapping_changed = True else: syslog.syslog(syslog.LOG_ERR, "Scheduler response: ERROR") except zmq.Again: syslog.syslog( syslog.LOG_ERROR, "Connection to server lost. " "Could not migrate workers on NUMA %d." % numa_node) else: syslog.syslog(syslog.LOG_INFO, "Worker cores still sufficiently balanced.") # ] end [NUMA-local] optimization if mapping_changed: # Print new mapping stats_sock.send(stats_request.SerializePartialToString()) stats_response = relay_pb2.StatsResponse() stats_response.ParseFromString(stats_sock.recv()) syslog.syslog(syslog.LOG_DEBUG, "New worker core mapping:") print_core_mapping(stats_response, worker_cores, syslog.LOG_DEBUG) time.sleep(poll_interval) # ] End while true syslog.syslog(syslog.LOG_NOTICE, "Stopping vio4wd_core_scheduler")