def __init__(self, host=HOST_DEFAULT, port=PORT_DEFAULT): self.host = host self.port = port self.monitor_thread_done = 0 self._setup_logger() self.actidb_client = ActiCloudDBClient()
def __init__(self): self.logger = logging.getLogger(self.__class__.__name__) self.COMPUTE_NODES = [ 'acticloud1', 'acticloud2', 'acticloud3', 'acticloud4' ] self.acticlouddb_client = ActiCloudDBClient() self.openstack_client = OpenstackClient()
class ActicloudNumGoldSilverFilter(filters.BaseHostFilter): """ ACTiCLOUD filter. Part of ACTiManager external. """ # list of hosts doesn't change within a request run_filter_once_per_request = True def __init__(self): LOG.info("Hello from ACTICLOUD Filter __init__") self.acticlouddb_client = ActiCloudDBClient() self.nova = novaclient.Client("2", **get_nova_creds()) def host_passes(self, host_state, spec_obj): hostname = host_state.host vm_uuid = spec_obj.instance_uuid flavor_id = spec_obj.flavor.flavorid nr_gold_vms = self.acticlouddb_client.get_nr_gold_vms_by_hostname( hostname) nr_gold_vcpus = self.acticlouddb_client.get_nr_gold_vcpus_by_hostname( hostname) nr_silver_vcpus = self.acticlouddb_client.get_nr_silver_vcpus_by_hostname( hostname) is_gold_vm = flavor_id in ["1001", "1002", "1004", "1008"] nr_vcpus_host = host_state.vcpus_total nr_vcpus_instance = spec_obj.vcpus # ## FIXME remove me # if (hostname != "acticloud1"): # return False ## How many vcpus do silver VMs occupy when oversubscribed? nr_silver_vcpus_ovs = int( math.ceil(float(nr_silver_vcpus) / ACTICLOUD_OVERSUBSCRIPTION)) free_vcpus = nr_vcpus_host - (nr_gold_vcpus + nr_silver_vcpus_ovs) ret = False if is_gold_vm: if free_vcpus >= nr_vcpus_instance: ret = True else: # is silver vm if free_vcpus >= math.ceil( nr_vcpus_instance / ACTICLOUD_OVERSUBSCRIPTION): ret = True else: free_oversubscribed_vcpus = int( nr_silver_vcpus_ovs * ACTICLOUD_OVERSUBSCRIPTION) - nr_silver_vcpus if free_oversubscribed_vcpus >= nr_vcpus_instance: ret = True if ret: LOG.info("DOES FIT IN HOST %s", hostname) else: LOG.info("DOES NOT FIT IN HOST %s", hostname) return ret
def __init__(self, hostname): self.logger = logging.getLogger(self.__class__.__name__) self.hostname = hostname ## Initialize the acticloudDB client self.acticloudDB_client = ActiCloudDBClient() ## Openstack client to get the necessary information when needed self.openstack_client = OpenstackClient() ## VMs that are being monitored, indexed by uuid self.monitored_vms = dict() ## Spawned perf threads self.perf_threads = []
class InformationAggregator(): def __init__(self): self.logger = logging.getLogger(self.__class__.__name__) self.COMPUTE_NODES = [ 'acticloud1', 'acticloud2', 'acticloud3', 'acticloud4' ] self.acticlouddb_client = ActiCloudDBClient() self.openstack_client = OpenstackClient() def is_gold_vm(self, vmid): return self.acticlouddb_client.is_gold_vm(vmid, 1) def get_vm_nr_vcpus(self, vmid): return self.acticlouddb_client.get_nr_vcpus(vmid) def get_vm_current_host(self, vmid): return self.openstack_client.get_vm_current_host(vmid) def getServerListByComputeNode(self, computeNode): return self.openstack_client.get_vms_by_hostname(computeNode) def getGoldServerListByComputeNode(self, computeNode): all_servers = self.getServerListByComputeNode(computeNode) gold_servers = [x for x in all_servers if self.is_gold_vm(x)] return gold_servers def getSilverServerListByComputeNode(self, computeNode): all_servers = self.getServerListByComputeNode(computeNode) silver_servers = [x for x in all_servers if not self.is_gold_vm(x)] return silver_servers def getVcpusByComputeNode(self, computeNode): gold_vcpus = self.acticlouddb_client.get_nr_gold_vcpus_by_hostname( computeNode) silver_vcpus = self.acticlouddb_client.get_nr_silver_vcpus_by_hostname( computeNode) return (gold_vcpus, silver_vcpus) def getServerList(self): return self.openstack_client.get_vms() def getComputeNodes(self): return list(self.COMPUTE_NODES)
def __init__(self, compute_node_name, system_type): global INTERFERENCE_DETECTION_ENABLED self.logger = logging.getLogger(self.__class__.__name__) self.hostname = compute_node_name self.compute_node_name = compute_node_name self.system_type = system_type if system_type == "actistatic" or system_type == "actifull": if system_type == "actifull": INTERFERENCE_DETECTION_ENABLED = True self.system = ActiManagerSystem([2, 1, 10], compute_node_name) elif system_type == "gps": self.system = GoldPerSocketSystem([2, 1, 10], compute_node_name) elif system_type == "gno": self.system = GoldNotOversubscribedSystem([2, 1, 10], compute_node_name) else: self.logger.error("Wrong system_type given: %s", system_type) sys.exit(1) self.acticlouddb_client = ActiCloudDBClient() self.openstack_client = OpenstackClient() return
class ActicloudGoldWeigher(weights.BaseHostWeigher): def __init__(self): LOG.info("Hello from Gold __init__") self.acticlouddb_client = ActiCloudDBClient() def _weigh_object(self, host_state, weight_properties): hostname = host_state.nodename nr_gold_vcpus = self.acticlouddb_client.get_nr_gold_vcpus_by_hostname( hostname) nr_silver_vcpus = self.acticlouddb_client.get_nr_silver_vcpus_by_hostname( hostname) nr_silver_vcpus_ovs = int( math.ceil(float(nr_silver_vcpus) / ACTICLOUD_OVERSUBSCRIPTION)) nr_vcpus_host = host_state.vcpus_total free_vcpus = nr_vcpus_host - (nr_gold_vcpus + nr_silver_vcpus_ovs) LOG.info( "Executing acticloud gold weigher for host %(host)s %(free_vcpus)d", { 'host': hostname, 'free_vcpus': free_vcpus }) """Higher weights win.""" return float(-free_vcpus)
class ActicloudWeigher(weights.BaseHostWeigher): def __init__(self): LOG.info("Hello from ActicloudWeigher __init__") self.acticlouddb_client = ActiCloudDBClient() self.nova = novaclient.Client("2", **get_nova_creds()) def _weigh_object(self, host_state, weight_properties): """Higher weights win.""" ## Get everything needed from the acticloudDB hostname = host_state.host nr_vcpus_host = host_state.vcpus_total vms = list(self.acticlouddb_client.get_vms_by_hostname(hostname)) LOG.info("============ ActicloudWeigher %s ===================>", hostname) ## Calculate current host_profit host_profit_before = calculate_host_profit(vms, nr_vcpus_host) ## Get everything needed from openstack new_vm_uuid = weight_properties.instance_uuid new_vm = self.nova.servers.get(new_vm_uuid) new_vm_vcpus = self.nova.flavors.get(new_vm.flavor['id']).vcpus new_vm_is_gold = new_vm.metadata.get('is_gold') or 0 new_vm_is_noisy = new_vm.metadata.get('is_noisy') or 0 new_vm_is_sensitive = new_vm.metadata.get('is_sensitive') or 0 vms.append({ 'id': new_vm_uuid, 'hostname': hostname, 'nr_vcpus': new_vm_vcpus, 'is_gold': new_vm_is_gold, 'is_noisy': new_vm_is_noisy, 'is_sensitive': new_vm_is_sensitive }) ## Calculate host_profit with new VM host_profit_after = calculate_host_profit(vms, nr_vcpus_host) ## If the new VM is the only one in the server, we need to include the ## cost of opening a server if len(vms) == 1: host_profit_after -= SERVER_COST host_profit_diff = host_profit_after - host_profit_before LOG.info("New profit: %.2f Previous profit: %.2f Difference: %.2f", host_profit_after, host_profit_before, host_profit_diff) LOG.info("=======================================================>") return host_profit_diff
class ActicloudSensitiveWeigher(weights.BaseHostWeigher): def __init__(self): LOG.info("Hello from Sensitive __init__") self.acticlouddb_client = ActiCloudDBClient() def _weigh_object(self, host_state, weight_properties): hostname = host_state.nodename nr_sensitive = self.acticlouddb_client.get_nr_sensitive_vms_by_hostname( hostname) LOG.info( "Executing acticloud sensitive weigher for host %(host)s %(nr_sensitive)d", { 'host': hostname, 'nr_gold': nr_sensitive }) """Higher weights win.""" return 0 if nr_sensitive == None else float(-nr_sensitive)
class VmMessagesMonitor(): def __init__(self, host=HOST_DEFAULT, port=PORT_DEFAULT): self.host = host self.port = port self.monitor_thread_done = 0 self._setup_logger() self.actidb_client = ActiCloudDBClient() def _setup_logger(self): self.logger = logging.getLogger(__name__) def _monitor_thread_function(self, s): while not self.monitor_thread_done: try: conn, addr = s.accept() conn.settimeout(10) data = "" try: while 1: new_data = conn.recv(1024) if not new_data: break data += new_data self.logger.info("EVENT: %s", data.strip()) json_data = json.loads(data) if "event" in json_data: event = json_data["event"] if event == "heartbeat": bench_name = json_data["bench"].replace( "-to-completion", "") nr_vcpus = json_data["vcpus"] vm_uuid = json_data["vm_uuid"] time = json_data["time"] output = json_data["output"] (performance, unit) = benchmarks.bench_get_perf_from_output( bench_name, 1, output) self.actidb_client.insert_heartbeat( vm_uuid, time, performance, bench_name, nr_vcpus) elif event == "acticloud-external-openstack-filter-profit-report": hostname = json_data["hostname"] time = json_data["time"] new_vm_uuid = json_data["new-vm-uuid"] profit_before = json_data["profit-before"] profit_after = json_data["profit-after"] profit_diff = json_data["profit-diff"] self.actidb_client.insert_external_profit_report( hostname, time, new_vm_uuid, profit_before, profit_after, profit_diff) elif event == "internal-profit-report": hostname = json_data["hostname"] time = json_data["time"] profit = json_data["profit-value"] self.actidb_client.insert_internal_profit_report( hostname, time, profit) except: print(traceback.format_exc()) self.logger.error( "Something went wrong when reading data from the socket." ) conn.close() except: pass def spawn_monitor_thread(self): retries = 10 self.logger.info("Initializing monitor thread at port: %d", self.port) while retries > 0: try: self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.socket.bind((self.host, self.port)) self.socket.listen(1000) self.socket.settimeout(5) break except socket.error: self.logger.info("Socket initialization error. Retrying") time.sleep(10) retries -= 1 if retries == 0: self.logger.info("Failed to initiliaze socket. Exiting") sys.exit(1) self.monitor_thread = threading.Thread( target=self._monitor_thread_function, args=(self.socket, )) self.monitor_thread.start() def stop_monitor_thread(self): self.monitor_thread_done = 1 self.monitor_thread.join() self.socket.close()
import os, sys, json, datetime sys.path.append('../') from acticloudDB import ActiCloudDBClient if (len(sys.argv) < 2): print "usage: %s <perf_output_files ...>" % sys.argv[0] sys.exit(1) actiDB = ActiCloudDBClient() perf_metrics = ['branches', 'branch-misses', 'cycles', 'instructions', 'context-switches', 'cpu-migrations', 'page-faults', 'LLC-loads', 'LLC-load-misses', 'dTLB-loads', 'dTLB-load-misses', 'mem-loads', 'mem-stores' ] starting_time = datetime.datetime.utcnow() - datetime.timedelta(minutes=30) total_files = len(sys.argv[1:]) for i, filename in enumerate(sys.argv[1:]): print "File %d of %d" % (i+1, total_files) tokens = os.path.basename(filename).split('-') bench_name = tokens[1] nr_vcpus = int(tokens[2].replace("vcpus.perf", "")) time = starting_time fp = open(filename) line = fp.readline() while line: tokens = line.split()
import os, sys sys.path.append('../') sys.path.append('../../actimanager/') from acticloudDB import ActiCloudDBClient import HealthyStateModel if (len(sys.argv) < 2): print "usage: %s <perf_output_files>" % sys.argv[0] sys.exit(1) actiDB = ActiCloudDBClient() for filename in sys.argv[1:]: tokens = os.path.basename(filename).split('-') nr_vcpus = int(tokens[2][0]) bench_name = tokens[0] + "-" + tokens[1] bench_name = bench_name.replace("_", "-") model = HealthyStateModel.model_train(filename) actiDB.insert_model(bench_name, nr_vcpus, model)
class InterferenceDetector(): def __init__(self, hostname): self.logger = logging.getLogger(self.__class__.__name__) self.hostname = hostname ## Initialize the acticloudDB client self.acticloudDB_client = ActiCloudDBClient() ## Openstack client to get the necessary information when needed self.openstack_client = OpenstackClient() ## VMs that are being monitored, indexed by uuid self.monitored_vms = dict() ## Spawned perf threads self.perf_threads = [] # vm: openstack VM object def check_interference(self, vm): ## Find (if exists) the model of the current benchmark model = self._get_model(vm) ## If no model exists in our database, we don't know about interference if (model == None): return False ## If the model was found we know ... inp_file = self._get_perf_output_filename(vm) if (not os.path.isfile(inp_file)): self.logger.error("Could not find a current perf output file for VM %s", vm.id) return False ## Everything is in place, checkout about interference (num_of_clusters, train_axis, train_labels, model, dev, train_metr, pca, scaler1, scaler2) = model has_interference = HealthyStateModel.model_test_dy(train_axis, inp_file, \ num_of_clusters, train_labels, model, dev, train_metr, \ pca, scaler1, scaler2) return has_interference # vm: openstack VM object def add_vm(self, vm): if vm in self.monitored_vms: self.logger.info("VM %s is already being monitored", vm.id) return self.logger.info("Adding VM %s", vm.id) self.monitored_vms[vm] = dict() # vm: openstack VM object def remove_vm(self, vm): if not vm in self.monitored_vms: self.logger.info("VM %s is not being monitored", vm.id) return self.logger.info("Removing VM %s", vm.id) del self.monitored_vms[vm] def remove_vm_by_uuid(self, vm_uuid): for vm in self.monitored_vms: if vm_uuid == vm.id: self.remove_vm(vm) return self.logger.info("VM with UUID=%s not found in monitored VMs", vm_uuid) def remove_all_vms(self): self.monitored_vms = dict() ## Spawns a perf_thread per monitored VM def start_perf_threads(self): for vm in self.monitored_vms: vm_uuid = vm.id vm_pid = self._get_pid_from_vm_id(vm) if vm_pid == -1: self.logger.info("Could not get the PID of VM %s", vm_uuid) continue t = threading.Thread(target=self._perf_thread_function, args=(vm, vm_uuid, vm_pid,)) t.start() self.perf_threads.append(t) def stop_perf_threads(self): for t in self.perf_threads: t.join() # vm: openstack VM object def _get_model(self, vm): vm_name = vm.name tokens = vm_name.split("-") if (len(tokens) < 4): model = None ## Could not get benchmark name, no model available else: bench_name = tokens[2] + "-" + tokens[3] ## FIXME this is not correct for stress nr_vcpus = int(self.openstack_client.get_flavor_by_id(vm.flavor["id"]).vcpus) model = self.acticloudDB_client.get_model(bench_name, nr_vcpus) if model == None: self.logger.info("Healthy state model NOT FOUND for VM %s", vm.id) else: self.logger.info("Healthy state model FOUND for VM %s (db entry: %s, %d)", vm.id, bench_name, nr_vcpus) return model def _get_perf_output_filename(self, vm): return perf_outputs_dir + "/" + vm.id + ".perf" ## FIXME the following are temporary # tokens = vm.name.split("-") # bench_name = tokens[2] # if "spec" in bench_name: # bench_name = bench_name + "-" + tokens[3] # nr_vcpus = int(openstack_client.get_flavor_by_id(vm.flavor["id"]).vcpus) # return perf_outputs_dir + "/" + bench_name + "-" + str(nr_vcpus) + "vcpus.perf" def _write_perf_output_to_file(self, vm, vm_uuid, output): output_file = self._get_perf_output_filename(vm) f = open(output_file, "w") f.write(output) f.close() def _perf_thread_function(self, vm, vm_uuid, vm_pid): self.logger.info("Starting perf command for VM %s", vm_uuid) perf_cmd = ("ssh %(hostname)s perf kvm --guest stat -e %(events)s " + \ "-I %(interval)s -p %(pid)d sleep %(runtime)d") % \ {'hostname': self.hostname, 'events': perf_events, 'interval': perf_interval, 'pid': vm_pid, 'runtime': perf_window} self.logger.info("Running perf command for VM %s (PID: %d)", vm.id, vm_pid) status, output = commands.getstatusoutput(perf_cmd) if status != 0: self.logger.info("Something went wrong with the perf command: %s", output) return self.monitored_vms[vm]['last_perf_output'] = output self._write_perf_output_to_file(vm, vm_uuid, output) ## vm: openstack VM object ## returns pid: int, on error -1 is returned def _get_pid_from_vm_id(self, vm): try: libvirt_instance_name = getattr(vm, 'OS-EXT-SRV-ATTR:instance_name') command = "ssh %s ps -ef | grep %s | grep \"qemu-system\" | grep -v grep | awk '{print $2}'" % (self.hostname, libvirt_instance_name) pid = commands.getoutput(command) return int(pid) except: return -1
def __init__(self): LOG.info("Hello from Gold __init__") self.acticlouddb_client = ActiCloudDBClient()
import os, sys, json sys.path.append('../') sys.path.append('../../workload/') from acticloudDB import ActiCloudDBClient import benchmarks if (len(sys.argv) != 2): print "usage: %s <executor_output_file>" % sys.argv[0] sys.exit(1) actiDB = ActiCloudDBClient() filename = sys.argv[1] fp = open(filename) already_in = dict() line = fp.readline() while line: line = fp.readline() if "EVENT" in line and "heartbeat" in line: tokens = line.split(' - ') event_output = tokens[2].replace("EVENT: ", "") json_data = json.loads(event_output) nr_vcpus = json_data['vcpus'] bench_name = json_data['bench'] if bench_name == "stress-cpu": bench_name += "-" + str(json_data['load']) bench_name = bench_name.replace("-to-completion", "") if (bench_name, nr_vcpus) in already_in: continue
class InformationAggregator(): def __init__(self, compute_node_name, system_type): global INTERFERENCE_DETECTION_ENABLED self.logger = logging.getLogger(self.__class__.__name__) self.hostname = compute_node_name self.compute_node_name = compute_node_name self.system_type = system_type if system_type == "actistatic" or system_type == "actifull": if system_type == "actifull": INTERFERENCE_DETECTION_ENABLED = True self.system = ActiManagerSystem([2, 1, 10], compute_node_name) elif system_type == "gps": self.system = GoldPerSocketSystem([2, 1, 10], compute_node_name) elif system_type == "gno": self.system = GoldNotOversubscribedSystem([2, 1, 10], compute_node_name) else: self.logger.error("Wrong system_type given: %s", system_type) sys.exit(1) self.acticlouddb_client = ActiCloudDBClient() self.openstack_client = OpenstackClient() return def get_system(self): return self.system def report_events(self): if ("acti" in self.system_type): event_logger.log_event({ 'event': 'internal-profit-report', 'profit-value': self.system.cur_profit, 'hostname': self.hostname }) esd_dict = dict() for vm in self.system.vms: esd_dict[vm] = [v.esd for v in self.system.vms[vm].vcpus] event_logger.log_event({ 'event': 'internal-esd-report', 'values': esd_dict, 'hostname': self.hostname }) def get_pid_from_vm_id(self, vm): command = "nova show %s | grep -i 'OS-EXT-SRV-ATTR:instance_name' | awk -F'|' '{print $3}'" % str( vm.id) libvirt_instance_name = shell_command.run_shell_command( command).strip() command = "ssh %s ps -ef | grep %s | grep \"qemu-system\" | grep -v grep | awk '{print $2}'" % ( self.hostname, libvirt_instance_name) pid = shell_command.run_shell_command(command).strip() return pid def get_dst_numa_node_from_pcpu(self, pcpu_id): #module numa has not implemented numa_node_of_cpu() call of numa(3) library for i in range(0, numa.get_max_node() + 1): if pcpu_id in numa.node_to_cpus(i): return i def print_vcpu_to_pcpu_mapping(self): p_mapping = [] for vm in self.getServerListByComputeNode(self.compute_node_name): vm_mapping = self.getVMVcpuMapping(vm) for i in range(len(vm_mapping)): p_mapping.append(0) break for vm in self.getServerListByComputeNode(self.compute_node_name): vm_mapping = self.getVMVcpuMapping(vm) for i in range(len(vm_mapping)): if vm_mapping[i]: if self.is_gold_vm(vm): p_mapping[i] += 10 else: p_mapping[i] += 1 self.logger.info("Physical CPU mapping: %s" % p_mapping) return def is_noisy_vm(self, vm): is_noisy = self.acticlouddb_client.is_noisy_vm(vm.id) return is_noisy def is_sensitive_vm(self, vm): is_sensitive = self.acticlouddb_client.is_sensitive_vm(vm.id) return is_sensitive def is_gold_vm(self, vm): is_gold = self.acticlouddb_client.is_gold_vm(vm.id, 1) return is_gold def get_vm_nr_vcpus(self, vm): return self.acticlouddb_client.get_nr_vcpus(vm.id) def get_cost_function(self, vm): cost_function = self.acticlouddb_client.cost_function(vm.id) return cost_function def get_cpu_util(self, vm): return 1.0 # FIXME cpu_util = self.acticlouddb_client.cpu_util(vm.id) return cpu_util def update_moves(self, vm): # vm: Vm class instance prev_moves = self.acticlouddb_client.get_moves(vm.id) self.acticlouddb_client.set_moves(vm.id, prev_moves + vm.moves) def getServerListByComputeNode(self, computeNode): return self.openstack_client.get_vms_by_hostname(computeNode) def getServerList(self): return self.openstack_client.get_vms() def getVMVcpuMapping(self, vm): try: libvirt_instance_name = getattr(vm, 'OS-EXT-SRV-ATTR:instance_name') with libvirt_client.LibVirtConnection(self.hostname, "qemu+ssh") as libvconn: libvinstance = libvirt_client.LibVirtInstance( libvconn, str(libvirt_instance_name)) return list(libvinstance.get_instance_mapping()[0]) except: self.logger.info( "=================> Could not get vcpu mapping of VM %s", vm.id) return None
import os, sys sys.path.append('../') sys.path.append('../../common') from acticloudDB import ActiCloudDBClient from config import * actiDB = ActiCloudDBClient() for host in OPENSTACK_HOSTS: actiDB.insert_host(host) #for filename in sys.argv[1:]: # tokens = os.path.basename(filename).split('-') # nr_vcpus = int(tokens[2][0]) # bench_name = tokens[0] + "-" + tokens[1] # bench_name = bench_name.replace("_", "-") # # model = HealthyStateModel.model_train(filename) # actiDB.insert_model(bench_name, nr_vcpus, model)
def __init__(self): LOG.info("Hello from ACTICLOUD Filter __init__") self.acticlouddb_client = ActiCloudDBClient() self.nova = novaclient.Client("2", **get_nova_creds())
import os, sys, json sys.path.append('../') from acticloudDB import ActiCloudDBClient if (len(sys.argv) < 2): print "usage: %s [table_names]" % sys.argv[0] sys.exit(1) actiDB = ActiCloudDBClient() for tbl_name in sys.argv[1:]: if (actiDB.clear_data_from_table(tbl_name) == 1): print "Cleared all data from table:", tbl_name else: print "Could not clear table %s, probably it does not exist (available tables: %s)" % \ (tbl_name, actiDB.get_db_table_names())