예제 #1
0
 def __init__(self):
     self.logger = logging.getLogger(self.__class__.__name__)
     self.COMPUTE_NODES = [
         'acticloud1', 'acticloud2', 'acticloud3', 'acticloud4'
     ]
     self.acticlouddb_client = ActiCloudDBClient()
     self.openstack_client = OpenstackClient()
예제 #2
0
    def __init__(self, poll_period):
        self.logger = logging.getLogger(self.__class__.__name__)

        ## The queue where messages from internals are stored
        self.messages = []

        self.openstack_client = OpenstackClient()

        self.information_aggregator = InformationAggregator()
        self.poll_period = poll_period
예제 #3
0
	def __init__(self, hostname):
		self.logger = logging.getLogger(self.__class__.__name__)
		self.hostname = hostname

		## Initialize the acticloudDB client
		self.acticloudDB_client = ActiCloudDBClient()

		## Openstack client to get the necessary information when needed
		self.openstack_client = OpenstackClient()

		## VMs that are being monitored, indexed by uuid
		self.monitored_vms = dict()

		## Spawned perf threads
		self.perf_threads = []
예제 #4
0
class InformationAggregator():
    def __init__(self):
        self.logger = logging.getLogger(self.__class__.__name__)
        self.COMPUTE_NODES = [
            'acticloud1', 'acticloud2', 'acticloud3', 'acticloud4'
        ]
        self.acticlouddb_client = ActiCloudDBClient()
        self.openstack_client = OpenstackClient()

    def is_gold_vm(self, vmid):
        return self.acticlouddb_client.is_gold_vm(vmid, 1)

    def get_vm_nr_vcpus(self, vmid):
        return self.acticlouddb_client.get_nr_vcpus(vmid)

    def get_vm_current_host(self, vmid):
        return self.openstack_client.get_vm_current_host(vmid)

    def getServerListByComputeNode(self, computeNode):
        return self.openstack_client.get_vms_by_hostname(computeNode)

    def getGoldServerListByComputeNode(self, computeNode):
        all_servers = self.getServerListByComputeNode(computeNode)
        gold_servers = [x for x in all_servers if self.is_gold_vm(x)]
        return gold_servers

    def getSilverServerListByComputeNode(self, computeNode):
        all_servers = self.getServerListByComputeNode(computeNode)
        silver_servers = [x for x in all_servers if not self.is_gold_vm(x)]
        return silver_servers

    def getVcpusByComputeNode(self, computeNode):
        gold_vcpus = self.acticlouddb_client.get_nr_gold_vcpus_by_hostname(
            computeNode)
        silver_vcpus = self.acticlouddb_client.get_nr_silver_vcpus_by_hostname(
            computeNode)
        return (gold_vcpus, silver_vcpus)

    def getServerList(self):
        return self.openstack_client.get_vms()

    def getComputeNodes(self):
        return list(self.COMPUTE_NODES)
예제 #5
0
	def __init__(self, compute_node_name, system_type):
		global INTERFERENCE_DETECTION_ENABLED
		self.logger = logging.getLogger(self.__class__.__name__)
		self.hostname = compute_node_name

		self.compute_node_name = compute_node_name

		self.system_type = system_type
		if system_type == "actistatic" or system_type == "actifull":
			if system_type == "actifull":
				INTERFERENCE_DETECTION_ENABLED = True
			self.system = ActiManagerSystem([2, 1, 10], compute_node_name)
		elif system_type == "gps":
			self.system = GoldPerSocketSystem([2, 1, 10], compute_node_name)
		elif system_type == "gno":
			self.system = GoldNotOversubscribedSystem([2, 1, 10], compute_node_name)
		else:
			self.logger.error("Wrong system_type given: %s", system_type)
			sys.exit(1)

		self.acticlouddb_client = ActiCloudDBClient()
		self.openstack_client = OpenstackClient()
		return
예제 #6
0
class InformationAggregator():
    def __init__(self, compute_node_name, system_type):
        global INTERFERENCE_DETECTION_ENABLED
        self.logger = logging.getLogger(self.__class__.__name__)
        self.hostname = compute_node_name

        self.compute_node_name = compute_node_name

        self.system_type = system_type
        if system_type == "actistatic" or system_type == "actifull":
            if system_type == "actifull":
                INTERFERENCE_DETECTION_ENABLED = True
            self.system = ActiManagerSystem([2, 1, 10], compute_node_name)
        elif system_type == "gps":
            self.system = GoldPerSocketSystem([2, 1, 10], compute_node_name)
        elif system_type == "gno":
            self.system = GoldNotOversubscribedSystem([2, 1, 10],
                                                      compute_node_name)
        else:
            self.logger.error("Wrong system_type given: %s", system_type)
            sys.exit(1)

        self.acticlouddb_client = ActiCloudDBClient()
        self.openstack_client = OpenstackClient()
        return

    def get_system(self):
        return self.system

    def report_events(self):
        if ("acti" in self.system_type):
            event_logger.log_event({
                'event': 'internal-profit-report',
                'profit-value': self.system.cur_profit,
                'hostname': self.hostname
            })
            esd_dict = dict()
            for vm in self.system.vms:
                esd_dict[vm] = [v.esd for v in self.system.vms[vm].vcpus]
            event_logger.log_event({
                'event': 'internal-esd-report',
                'values': esd_dict,
                'hostname': self.hostname
            })

    def get_pid_from_vm_id(self, vm):
        command = "nova show %s | grep -i 'OS-EXT-SRV-ATTR:instance_name' | awk -F'|' '{print $3}'" % str(
            vm.id)
        libvirt_instance_name = shell_command.run_shell_command(
            command).strip()
        command = "ssh %s ps -ef | grep %s | grep \"qemu-system\" | grep -v grep | awk '{print $2}'" % (
            self.hostname, libvirt_instance_name)
        pid = shell_command.run_shell_command(command).strip()
        return pid

    def get_dst_numa_node_from_pcpu(self, pcpu_id):
        #module numa has not implemented numa_node_of_cpu() call of numa(3) library
        for i in range(0, numa.get_max_node() + 1):
            if pcpu_id in numa.node_to_cpus(i):
                return i

    def print_vcpu_to_pcpu_mapping(self):
        p_mapping = []
        for vm in self.getServerListByComputeNode(self.compute_node_name):
            vm_mapping = self.getVMVcpuMapping(vm)
            for i in range(len(vm_mapping)):
                p_mapping.append(0)
            break
        for vm in self.getServerListByComputeNode(self.compute_node_name):
            vm_mapping = self.getVMVcpuMapping(vm)
            for i in range(len(vm_mapping)):
                if vm_mapping[i]:
                    if self.is_gold_vm(vm):
                        p_mapping[i] += 10
                    else:
                        p_mapping[i] += 1
        self.logger.info("Physical CPU mapping: %s" % p_mapping)
        return

    def is_noisy_vm(self, vm):
        is_noisy = self.acticlouddb_client.is_noisy_vm(vm.id)
        return is_noisy

    def is_sensitive_vm(self, vm):
        is_sensitive = self.acticlouddb_client.is_sensitive_vm(vm.id)
        return is_sensitive

    def is_gold_vm(self, vm):
        is_gold = self.acticlouddb_client.is_gold_vm(vm.id, 1)
        return is_gold

    def get_vm_nr_vcpus(self, vm):
        return self.acticlouddb_client.get_nr_vcpus(vm.id)

    def get_cost_function(self, vm):
        cost_function = self.acticlouddb_client.cost_function(vm.id)
        return cost_function

    def get_cpu_util(self, vm):
        return 1.0  # FIXME
        cpu_util = self.acticlouddb_client.cpu_util(vm.id)
        return cpu_util

    def update_moves(self, vm):  # vm: Vm class instance
        prev_moves = self.acticlouddb_client.get_moves(vm.id)
        self.acticlouddb_client.set_moves(vm.id, prev_moves + vm.moves)

    def getServerListByComputeNode(self, computeNode):
        return self.openstack_client.get_vms_by_hostname(computeNode)

    def getServerList(self):
        return self.openstack_client.get_vms()

    def getVMVcpuMapping(self, vm):
        try:
            libvirt_instance_name = getattr(vm,
                                            'OS-EXT-SRV-ATTR:instance_name')
            with libvirt_client.LibVirtConnection(self.hostname,
                                                  "qemu+ssh") as libvconn:
                libvinstance = libvirt_client.LibVirtInstance(
                    libvconn, str(libvirt_instance_name))
                return list(libvinstance.get_instance_mapping()[0])
        except:
            self.logger.info(
                "=================> Could not get vcpu mapping of VM %s",
                vm.id)
            return None
예제 #7
0
from benchmarks import *

sys.path.append('../common/')
from openstack_client import OpenstackClient
import event_logger

## Setup the logging facility
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format='%(asctime)s - %(name)s - %(message)s',
                    datefmt='%Y-%m-%d.%H:%M:%S')
logging.Formatter.converter = time.gmtime
logger = logging.getLogger("executor")

## Inititalize Openstack Client
ost_client = OpenstackClient()


def get_image_by_bench(bench):
    img_name = bench['openstack_image']
    for img in ost_client.get_images():
        if img_name in img.name:
            return img


def spawn_vm(seq_num, vm_chars, wait_until_finished):
    '''
		Spawns a VM and returns 0 if an error has occured
	'''
    vcpus = vm_chars['nr_vcpus']
    is_gold = vm_chars['is_gold']
예제 #8
0
class ACTiManagerExternal():
    def __init__(self, poll_period):
        self.logger = logging.getLogger(self.__class__.__name__)

        ## The queue where messages from internals are stored
        self.messages = []

        self.openstack_client = OpenstackClient()

        self.information_aggregator = InformationAggregator()
        self.poll_period = poll_period

    def start_rabbitmq_thread(self):
        self.logger.info("Starting rabbitmq consumer thread")
        self.rabbitmq_thread_done = 0
        self.rabbitmq_thread = threading.Thread(
            target=self.rabbitmq_consumer_thread)
        self.rabbitmq_thread.start()

    def stop_rabbitmq_thread(self):
        self.logger.info("Stopping rabbitmq consumer thread")
        self.rabbitmq_thread_done = 1
        self.rabbitmq_thread.join()

    def rabbitmq_consumer_thread(self):
        pika_creds = pika.credentials.PlainCredentials(RABBITMQ_USERNAME,
                                                       RABBITMQ_PASSWORD)
        pika_cp = pika.ConnectionParameters(RABBITMQ_IP,
                                            credentials=pika_creds)
        connection = pika.BlockingConnection(pika_cp)
        channel = connection.channel()
        channel.queue_declare(queue=RABBITMQ_ACTI_QUEUE_NAME)
        for message in channel.consume(RABBITMQ_ACTI_QUEUE_NAME,
                                       inactivity_timeout=5,
                                       auto_ack=True):
            if self.rabbitmq_thread_done:
                break
            if message == (None, None, None):
                continue
            method, properties, body = message
            self.logger.info("Received the following message: %s", body)
            self.messages.append(body)

    def check_internal_notification(self):
        if (len(self.messages) == 0):
            return None
        else:
            msg = self.messages[0]
            self.messages = self.messages[1:]
            return msg

    def doMigrate(self, vm_uuid, dst):
        self.openstack_client.live_migrate_vm(vm_uuid, dst)

    def find_server_overload_dst(self, vmid):
        '''
		Returns the most appropriate destination host for the given vm.
		By "most appropriate" we mean: 
		1. Not the current vm's host
		2. The least loaded (in vcpus) host
		3. On tie, the host with the least gold vcpus
		3. On second tie, we just return the first of the remaining hosts
		'''
        self.logger.info("Starting find_server_overload_dst(%s)", vmid)
        servers = self.information_aggregator.getComputeNodes()

        ## Remove vm's current host
        current_host = self.information_aggregator.get_vm_current_host(vmid)
        if (current_host in servers):
            servers.remove(current_host)
        else:
            ## Probably current host is None and the VM is deleted
            self.logger.info("Current host %s not in servers list %s",
                             current_host, servers)
            return None

        ## vcpus_per_server is a list of tuples with the following scheme:
        ##  [ ("acticloud1", nr_total_vcpus, nr_gold_vcpus), (...), ... ]
        vcpus_per_server = []
        for server in servers:
            (gold_vcpus, silver_vcpus
             ) = self.information_aggregator.getVcpusByComputeNode(server)
            vcpus_per_server.append(
                (server, gold_vcpus + silver_vcpus, gold_vcpus))

        ## Sort by nr_total_vcpus
        vcpus_per_server.sort(key=lambda x: x[1])

        ## Keep only those with the min nr_total_vcpus
        vcpus_per_server = [
            x for x in vcpus_per_server if x[1] == vcpus_per_server[0][1]
        ]
        self.logger.info("vcpus_per_server: %s", vcpus_per_server)

        ## Now sort by nr_gold_vcpus
        vcpus_per_server.sort(key=lambda x: x[2])

        ## We now have our destination
        dst = vcpus_per_server[0][0]
        dst_gold_vcpus = vcpus_per_server[0][2]
        dst_silver_vcpus = vcpus_per_server[0][1] - dst_gold_vcpus

        ## Finally, check if the given VM can fit in the destination host
        vm_vcpus = self.information_aggregator.get_vm_nr_vcpus(vmid)
        if self.information_aggregator.is_gold_vm(vmid):
            if vm_vcpus <= (float(pcpus - dst_gold_vcpus) -
                            float(dst_silver_vcpus) / os_limit):
                return dst
        else:
            if vm_vcpus <= (
                (pcpus - dst_gold_vcpus) * os_limit - dst_silver_vcpus):
                return dst

        ## The VM does not fit in the destination host
        return None

    def handle_server_overload(self, events):
        servers = self.information_aggregator.getComputeNodes()
        ol_events = events['SERVER_OVERLOAD']
        messages = [0 for s in servers]
        for (host, vmid) in ol_events:
            ## If the VM's current host is different than the one that sent the message,
            ## the VM has probably already been migrated
            current_host = self.information_aggregator.get_vm_current_host(
                vmid)
            if (current_host != host):
                continue
            messages[servers.index(host)] += 1
        ol_servers = sum([int(bool(x)) for x in messages])

        overload_pct = float(ol_servers) / len(servers)
        handled = [False for s in servers]
        actions = 0
        if overload_pct < overload_threshold1:
            actions = ol_servers
        elif overload_pct < overload_threshold2:
            actions = ol_servers / 2
        else:
            # complain to remote cloud
            return

        for (host, vmid) in ol_events:
            if not actions:
                break
            if not handled[servers.index(host)]:
                actions -= 1
                dst = self.find_server_overload_dst(vmid)
                if dst == None:
                    ## FIXME nowhere to put the new VM, possibly pause it??
                    continue
                self.doMigrate(vmid, dst)
                handled[servers.index(host)] = True

    def execute(self):
        while (1):
            events = dict()
            events["SERVER_OVERLOAD"] = []
            events["INTERFERENCE"] = []
            message = self.check_internal_notification()
            has_overload = False
            while message != None:
                self.logger.info("GOT MESSAGE: %s", message)
                message_type = message.split(' ')[0]
                if message_type == "SERVER_OVERLOAD":
                    hostname = message.split(' ')[1]
                    hint_vmid = message.split(' ')[2]
                    events[message_type].append((hostname, hint_vmid))
                    has_overload = True
                elif message_type == "INTERFERENCE":
                    vmid = message.split()[1]
                    dst = self.find_server_overload_dst(vmid)
                    self.logger.info(
                        "Handling interference of VM %s by migrating to host %s",
                        vmid, dst)
                    if dst != None:
                        self.doMigrate(vmid, dst)
                    else:
                        self.logger.info(
                            "Could not find a valid destination host for VM %s",
                            vmid)
                else:
                    pass
                message = self.check_internal_notification()

            if has_overload:
                self.handle_server_overload(events)

            self.logger.info("Sleeping for DP=%d seconds" % self.poll_period)
            time.sleep(self.poll_period)
예제 #9
0
from openstack_client import OpenstackClient

opts = [
    cfg.StrOpt('default_deploy_kernel_id',
               help='Deploy kernel image used by the synch mechanism'),
    cfg.StrOpt('default_deploy_ramdisk_id',
               help='Deploy ramdisk image used by the synch mechanism'),
    cfg.StrOpt('default_sync_driver',
               help='Default driver to synch with OneView'),
]

CONF = cfg.CONF
CONF.register_opts(opts, group='ironic')
CONF(default_config_files=['sync.conf'])

os_client = OpenstackClient()
ov_client = OneViewClient()
sh_api = ov_client.server_hardware_api


def get_config_options():
    return CONF


#===============================================================================
# Ironic actions
#===============================================================================

def get_ironic_client():
    return os_client._get_ironic_client()
예제 #10
0
class InterferenceDetector():
	def __init__(self, hostname):
		self.logger = logging.getLogger(self.__class__.__name__)
		self.hostname = hostname

		## Initialize the acticloudDB client
		self.acticloudDB_client = ActiCloudDBClient()

		## Openstack client to get the necessary information when needed
		self.openstack_client = OpenstackClient()

		## VMs that are being monitored, indexed by uuid
		self.monitored_vms = dict()

		## Spawned perf threads
		self.perf_threads = []

	# vm: openstack VM object
	def check_interference(self, vm):
		## Find (if exists) the model of the current benchmark
		model = self._get_model(vm)

		## If no model exists in our database, we don't know about interference
		if (model == None):
			return False
		
		## If the model was found we know ...
		inp_file = self._get_perf_output_filename(vm)
		if (not os.path.isfile(inp_file)):
			self.logger.error("Could not find a current perf output file for VM %s", vm.id)
			return False

		## Everything is in place, checkout about interference
		(num_of_clusters, train_axis, train_labels,
		 model, dev, train_metr, pca, scaler1, scaler2) = model
		has_interference = HealthyStateModel.model_test_dy(train_axis, inp_file, \
						num_of_clusters, train_labels, model, dev, train_metr, \
						pca, scaler1, scaler2)
		return has_interference

	# vm: openstack VM object
	def add_vm(self, vm):
		if vm in self.monitored_vms:
			self.logger.info("VM %s is already being monitored", vm.id)
			return

		self.logger.info("Adding VM %s", vm.id)
		self.monitored_vms[vm] = dict()

	# vm: openstack VM object
	def remove_vm(self, vm):
		if not vm in self.monitored_vms:
			self.logger.info("VM %s is not being monitored", vm.id)
			return

		self.logger.info("Removing VM %s", vm.id)
		del self.monitored_vms[vm]

	def remove_vm_by_uuid(self, vm_uuid):
		for vm in self.monitored_vms:
			if vm_uuid == vm.id:
				self.remove_vm(vm)
				return
		self.logger.info("VM with UUID=%s not found in monitored VMs", vm_uuid)

	def remove_all_vms(self):
		self.monitored_vms = dict()

	## Spawns a perf_thread per monitored VM
	def start_perf_threads(self):
		for vm in self.monitored_vms:
			vm_uuid = vm.id
			vm_pid = self._get_pid_from_vm_id(vm)
			if vm_pid == -1:
				self.logger.info("Could not get the PID of VM %s", vm_uuid)
				continue

			t = threading.Thread(target=self._perf_thread_function, args=(vm, vm_uuid, vm_pid,))
			t.start()
			self.perf_threads.append(t)

	def stop_perf_threads(self):
		for t in self.perf_threads:
			t.join()

	# vm: openstack VM object
	def _get_model(self, vm):
		vm_name = vm.name
		tokens = vm_name.split("-")
		if (len(tokens) < 4):
			model = None ## Could not get benchmark name, no model available
		else:
			bench_name = tokens[2] + "-" + tokens[3] ## FIXME this is not correct for stress
			nr_vcpus = int(self.openstack_client.get_flavor_by_id(vm.flavor["id"]).vcpus)
			model = self.acticloudDB_client.get_model(bench_name, nr_vcpus)
		if model == None:
			self.logger.info("Healthy state model NOT FOUND for VM %s", vm.id)
		else:
			self.logger.info("Healthy state model FOUND for VM %s (db entry: %s, %d)",
			                 vm.id, bench_name, nr_vcpus)
		return model
				
	def _get_perf_output_filename(self, vm):
		return perf_outputs_dir + "/" + vm.id + ".perf"
		## FIXME the following are temporary
#		tokens = vm.name.split("-")
#		bench_name = tokens[2]
#		if "spec" in bench_name:
#			bench_name = bench_name + "-" + tokens[3]
#		nr_vcpus = int(openstack_client.get_flavor_by_id(vm.flavor["id"]).vcpus)
#		return perf_outputs_dir + "/" + bench_name + "-" + str(nr_vcpus) + "vcpus.perf"
		

	def _write_perf_output_to_file(self, vm, vm_uuid, output):
		output_file = self._get_perf_output_filename(vm)
		f = open(output_file, "w")
		f.write(output)
		f.close()

	def _perf_thread_function(self, vm, vm_uuid, vm_pid):
		self.logger.info("Starting perf command for VM %s", vm_uuid)
		perf_cmd = ("ssh %(hostname)s perf kvm --guest stat -e %(events)s " + \
		           "-I %(interval)s -p %(pid)d sleep %(runtime)d") % \
		           {'hostname': self.hostname, 'events': perf_events,
		            'interval': perf_interval, 'pid': vm_pid,
		            'runtime': perf_window}
		self.logger.info("Running perf command for VM %s (PID: %d)", vm.id, vm_pid)
		status, output = commands.getstatusoutput(perf_cmd)
		if status != 0:
			self.logger.info("Something went wrong with the perf command: %s", output)
			return

		self.monitored_vms[vm]['last_perf_output'] = output

		self._write_perf_output_to_file(vm, vm_uuid, output)

	## vm: openstack VM object
	## returns pid: int, on error -1 is returned
	def _get_pid_from_vm_id(self, vm):
		try:
			libvirt_instance_name = getattr(vm, 'OS-EXT-SRV-ATTR:instance_name')
			command = "ssh %s ps -ef | grep %s | grep \"qemu-system\" | grep -v grep | awk '{print $2}'" % (self.hostname, libvirt_instance_name)
			pid = commands.getoutput(command)
			return int(pid)
		except:
			return -1
예제 #11
0
	## Setup the logging facility
	logging.basicConfig(stream=sys.stdout, level=logging.INFO,
	                    format='%(asctime)s - %(name)20s - %(message)s',
	                    datefmt='%Y-%m-%d.%H:%M:%S')
	logging.Formatter.converter = time.gmtime
	logger = logging.getLogger("interference-detector")

	## Check and then read arguments
	if (len(sys.argv) < 2):
		logger.error("usage: %s <hostname>", sys.argv[0])
		sys.exit(1)
	hostname = sys.argv[1]

	## Initialize openstack client
	openstack_client = OpenstackClient()
	
	## Initialize Interference Detector
	detector = InterferenceDetector(hostname)

	while 1:
		## Delete all previously monitored VMs
		detector.remove_all_vms()

		## Add all the VMs to be monitored
		for vm in openstack_client.get_vms_by_hostname(hostname):
			if "acticloud" in vm.name:
				detector.add_vm(vm)

		## Start and wait for perf threads to finish
		detector.start_perf_threads()