コード例 #1
0
ファイル: libvirt.py プロジェクト: jesseolsen/monasca-agent
    def check(self, instance):
        """Gather VM metrics for each instance"""

        # Load metric cache
        metric_cache = self._load_metric_cache()

        # Load the nova-obtained instance data cache
        instance_cache = self._load_instance_cache()

        # Build dimensions for both the customer and for operations
        dims_base = self._set_dimensions({'service': 'compute', 'component': 'vm'}, instance)

        # Define aggregate gauges, gauge name to metric name
        agg_gauges = {'vcpus': 'nova.vm.cpu.total_allocated',
                      'ram': 'nova.vm.mem.total_allocated_mb',
                      'disk': 'nova.vm.disk.total_allocated_gb'}
        agg_values = {}
        for gauge in agg_gauges.keys():
            agg_values[gauge] = 0

        insp = inspector.get_hypervisor_inspector()
        for inst in insp._get_connection().listAllDomains():
            # Verify that this instance exists in the cache.  Add if necessary.
            inst_name = inst.name()
            if inst.isActive() == 0:
                self.log.info("{0} is not active -- skipping.".format(inst_name))
                continue
            if inst_name not in instance_cache:
                instance_cache = self._update_instance_cache()
            if inst_name not in metric_cache:
                metric_cache[inst_name] = {}

            # Skip instances created within the probation period
            vm_probation_remaining = self._test_vm_probation(instance_cache.get(inst_name)['created'])
            if (vm_probation_remaining >= 0):
                self.log.info("Libvirt: {0} in probation for another {1} seconds".format(instance_cache.get(inst_name)['hostname'],
                                                                                         vm_probation_remaining))
                continue

            # Build customer dimensions
            dims_customer = dims_base.copy()
            dims_customer['resource_id'] = instance_cache.get(inst_name)['instance_uuid']
            dims_customer['zone'] = instance_cache.get(inst_name)['zone']
            # Add dimensions that would be helpful for operations
            dims_operations = dims_customer.copy()
            dims_operations['tenant_id'] = instance_cache.get(inst_name)['tenant_id']
            dims_operations['cloud_tier'] = 'overcloud'

            # Accumulate aggregate data
            for gauge in agg_gauges:
                if gauge in instance_cache.get(inst_name):
                    agg_values[gauge] += instance_cache.get(inst_name)[gauge]

            # CPU utilization percentage
            sample_time = float("{:9f}".format(time.time()))
            if 'cpu.time' in metric_cache[inst_name]:
                # I have a prior value, so calculate the rate & push the metric
                cpu_diff = insp.inspect_cpus(inst).time - metric_cache[inst_name]['cpu.time']['value']
                time_diff = sample_time - float(metric_cache[inst_name]['cpu.time']['timestamp'])
                # Convert time_diff to nanoseconds, and calculate percentage
                rate = (cpu_diff / (time_diff * 1000000000)) * 100

                self.gauge('cpu.utilization_perc', int(round(rate, 0)),
                           dimensions=dims_customer,
                           delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                           hostname=instance_cache.get(inst_name)['hostname'])
                self.gauge('vm.cpu.utilization_perc', int(round(rate, 0)),
                           dimensions=dims_operations)

            metric_cache[inst_name]['cpu.time'] = {'timestamp': sample_time,
                                                   'value': insp.inspect_cpus(inst).time}

            # Disk utilization
            for disk in insp.inspect_disks(inst):
                sample_time = time.time()
                disk_dimensions = {'device': disk[0].device}
                for metric in disk[1]._fields:
                    metric_name = "io.{0}".format(metric)
                    if metric_name not in metric_cache[inst_name]:
                        metric_cache[inst_name][metric_name] = {}

                    value = int(disk[1].__getattribute__(metric))
                    if disk[0].device in metric_cache[inst_name][metric_name]:
                        time_diff = sample_time - metric_cache[inst_name][metric_name][disk[0].device]['timestamp']
                        val_diff = value - metric_cache[inst_name][metric_name][disk[0].device]['value']
                        # Change the metric name to a rate, ie. "io.read_requests"
                        # gets converted to "io.read_ops_sec"
                        rate_name = "{0}_sec".format(metric_name.replace('requests', 'ops'))
                        # Customer
                        this_dimensions = disk_dimensions.copy()
                        this_dimensions.update(dims_customer)
                        self.gauge(rate_name, val_diff, dimensions=this_dimensions,
                                   delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                                   hostname=instance_cache.get(inst_name)['hostname'])
                        # Operations (metric name prefixed with "vm."
                        this_dimensions = disk_dimensions.copy()
                        this_dimensions.update(dims_operations)
                        self.gauge("vm.{0}".format(rate_name), val_diff,
                                   dimensions=this_dimensions)
                    # Save this metric to the cache
                    metric_cache[inst_name][metric_name][disk[0].device] = {
                        'timestamp': sample_time,
                        'value': value}

            # Network utilization
            for vnic in insp.inspect_vnics(inst):
                sample_time = time.time()
                vnic_dimensions = {'device': vnic[0].name}
                for metric in vnic[1]._fields:
                    metric_name = "net.{0}".format(metric)
                    if metric_name not in metric_cache[inst_name]:
                        metric_cache[inst_name][metric_name] = {}

                    value = int(vnic[1].__getattribute__(metric))
                    if vnic[0].name in metric_cache[inst_name][metric_name]:
                        time_diff = sample_time - metric_cache[inst_name][metric_name][vnic[0].name]['timestamp']
                        val_diff = value - metric_cache[inst_name][metric_name][vnic[0].name]['value']
                        # Change the metric name to a rate, ie. "net.rx_bytes"
                        # gets converted to "net.rx_bytes_sec"
                        rate_name = "{0}_sec".format(metric_name)
                        # Rename "tx" to "out" and "rx" to "in"
                        rate_name = rate_name.replace("tx", "out")
                        rate_name = rate_name.replace("rx", "in")
                        # Customer
                        this_dimensions = vnic_dimensions.copy()
                        this_dimensions.update(dims_customer)
                        self.gauge(rate_name, val_diff,
                                   dimensions=this_dimensions,
                                   delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                                   hostname=instance_cache.get(inst_name)['hostname'])
                        # Operations (metric name prefixed with "vm."
                        this_dimensions = vnic_dimensions.copy()
                        this_dimensions.update(dims_operations)
                        self.gauge("vm.{0}".format(rate_name), val_diff,
                                   dimensions=this_dimensions)
                    # Save this metric to the cache
                    metric_cache[inst_name][metric_name][vnic[0].name] = {
                        'timestamp': sample_time,
                        'value': value}

        # Save these metrics for the next collector invocation
        self._update_metric_cache(metric_cache)

        # Publish aggregate metrics
        for gauge in agg_gauges:
            self.gauge(agg_gauges[gauge], agg_values[gauge], dimensions=dims_base)
コード例 #2
0
    def check(self, instance):
        """Gather VM metrics for each instance"""

        time_start = time.time()

        # Load metric cache
        metric_cache = self._load_metric_cache()

        # Load the nova-obtained instance data cache
        instance_cache = self._load_instance_cache()

        # Build dimensions for both the customer and for operations
        dims_base = self._set_dimensions(
            {
                'service': 'compute',
                'component': 'vm'
            }, instance)

        # Define aggregate gauges, gauge name to metric name
        agg_gauges = {
            'vcpus': 'nova.vm.cpu.total_allocated',
            'ram': 'nova.vm.mem.total_allocated_mb',
            'disk': 'nova.vm.disk.total_allocated_gb'
        }
        agg_values = {}
        for gauge in agg_gauges.keys():
            agg_values[gauge] = 0

        insp = inspector.get_hypervisor_inspector()
        for inst in insp._get_connection().listAllDomains():
            # Verify that this instance exists in the cache.  Add if necessary.
            inst_name = inst.name()
            if inst_name not in instance_cache:
                instance_cache = self._update_instance_cache()

            # Build customer dimensions
            try:
                dims_customer = dims_base.copy()
                dims_customer['resource_id'] = instance_cache.get(
                    inst_name)['instance_uuid']
                dims_customer['zone'] = instance_cache.get(inst_name)['zone']
                # Add dimensions that would be helpful for operations
                dims_operations = dims_customer.copy()
                dims_operations['tenant_id'] = instance_cache.get(
                    inst_name)['tenant_id']
                if self.init_config.get('metadata'):
                    for metadata in self.init_config.get('metadata'):
                        metadata_value = (
                            instance_cache.get(inst_name).get(metadata))
                        if metadata_value:
                            dims_operations[metadata] = metadata_value
                # Remove customer 'hostname' dimension, this will be replaced by the VM name
                del (dims_customer['hostname'])
            except TypeError:
                # Nova can potentially get into a state where it can't see an
                # instance, but libvirt can.  This would cause TypeErrors as
                # incomplete data is cached for this instance.  Log and skip.
                self.log.error(
                    "{0} is not known to nova after instance cache update -- skipping this ghost VM."
                    .format(inst_name))
                continue

            # Accumulate aggregate data
            for gauge in agg_gauges:
                if gauge in instance_cache.get(inst_name):
                    agg_values[gauge] += instance_cache.get(inst_name)[gauge]

            # Skip further processing on VMs that are not in an active state
            if self._inspect_state(insp, inst, instance_cache, dims_customer,
                                   dims_operations) != 0:
                continue

            # Skip the remainder of the checks if alive_only is True in the config
            if self.init_config.get('alive_only'):
                continue

            # Skip instances created within the probation period
            vm_probation_remaining = self._test_vm_probation(
                instance_cache.get(inst_name)['created'])
            if (vm_probation_remaining >= 0):
                self.log.info(
                    "Libvirt: {0} in probation for another {1} seconds".format(
                        instance_cache.get(inst_name)['hostname'].encode(
                            'utf8'), vm_probation_remaining))
                continue

            if inst_name not in metric_cache:
                metric_cache[inst_name] = {}

            self._inspect_cpu(insp, inst, instance_cache, metric_cache,
                              dims_customer, dims_operations)
            self._inspect_disks(insp, inst, instance_cache, metric_cache,
                                dims_customer, dims_operations)
            self._inspect_network(insp, inst, instance_cache, metric_cache,
                                  dims_customer, dims_operations)

            # Memory utilizaion
            # (req. balloon driver; Linux kernel param CONFIG_VIRTIO_BALLOON)
            try:
                mem_metrics = {
                    'mem.free_mb':
                    float(inst.memoryStats()['unused']) / 1024,
                    'mem.swap_used_mb':
                    float(inst.memoryStats()['swap_out']) / 1024,
                    'mem.total_mb':
                    float(inst.memoryStats()['available'] -
                          inst.memoryStats()['unused']) / 1024,
                    'mem.used_mb':
                    float(inst.memoryStats()['available'] -
                          inst.memoryStats()['unused']) / 1024,
                    'mem.free_perc':
                    float(inst.memoryStats()['unused']) /
                    float(inst.memoryStats()['available']) * 100
                }
                for name in mem_metrics:
                    self.gauge(
                        name,
                        mem_metrics[name],
                        dimensions=dims_customer,
                        delegated_tenant=instance_cache.get(
                            inst_name)['tenant_id'],
                        hostname=instance_cache.get(inst_name)['hostname'])
                    self.gauge("vm.{0}".format(name),
                               mem_metrics[name],
                               dimensions=dims_operations)
            except KeyError:
                self.log.debug(
                    "Balloon driver not active/available on guest {0} ({1})".
                    format(inst_name,
                           instance_cache.get(inst_name)['hostname']))
            # Test instance's remote responsiveness (ping check) if possible
            if self.init_config.get(
                    'ping_check') and 'network' in instance_cache.get(
                        inst_name):
                for net in instance_cache.get(inst_name)['network']:

                    ping_cmd = self.init_config.get('ping_check').replace(
                        'NAMESPACE', net['namespace']).split()
                    ping_cmd.append(net['ip'])
                    dims_customer_ip = dims_customer.copy()
                    dims_operations_ip = dims_operations.copy()
                    dims_customer_ip['ip'] = net['ip']
                    dims_operations_ip['ip'] = net['ip']
                    with open(os.devnull, "w") as fnull:
                        try:
                            self.log.debug("Running ping test: {0}".format(
                                ' '.join(ping_cmd)))
                            res = subprocess.call(ping_cmd,
                                                  stdout=fnull,
                                                  stderr=fnull)
                            self.gauge('ping_status',
                                       res,
                                       dimensions=dims_customer_ip,
                                       delegated_tenant=instance_cache.get(
                                           inst_name)['tenant_id'],
                                       hostname=instance_cache.get(inst_name)
                                       ['hostname'])
                            self.gauge('vm.ping_status',
                                       res,
                                       dimensions=dims_operations_ip)
                        except OSError as e:
                            self.log.warn(
                                "OS error running '{0}' returned {1}".format(
                                    ping_cmd, e))

        # Save these metrics for the next collector invocation
        self._update_metric_cache(metric_cache,
                                  math.ceil(time.time() - time_start))

        # Publish aggregate metrics
        for gauge in agg_gauges:
            self.gauge(agg_gauges[gauge],
                       agg_values[gauge],
                       dimensions=dims_base)
コード例 #3
0
ファイル: libvirt.py プロジェクト: remeq/monasca-agent
    def check(self, instance):
        """Gather VM metrics for each instance"""

        time_start = time.time()

        # Load metric cache
        metric_cache = self._load_metric_cache()

        # Load the nova-obtained instance data cache
        instance_cache = self._load_instance_cache()

        # Build dimensions for both the customer and for operations
        dims_base = self._set_dimensions({'service': 'compute', 'component': 'vm'}, instance)

        # Define aggregate gauges, gauge name to metric name
        agg_gauges = {'vcpus': 'nova.vm.cpu.total_allocated',
                      'ram': 'nova.vm.mem.total_allocated_mb',
                      'disk': 'nova.vm.disk.total_allocated_gb'}
        agg_values = {}
        for gauge in agg_gauges.keys():
            agg_values[gauge] = 0

        insp = inspector.get_hypervisor_inspector()
        updated_cache_this_time = False
        ping_results = []
        for inst in insp._get_connection().listAllDomains():
            # Verify that this instance exists in the cache.  Add if necessary.
            inst_name = inst.name()
            if inst_name not in instance_cache and not updated_cache_this_time:
                #
                # If we have multiple ghost VMs, we'll needlessly
                # update the instance cache.  Let's limit the cache
                # update to once per agent wakeup.
                #
                updated_cache_this_time = True
                instance_cache = self._update_instance_cache()

            # Build customer dimensions
            try:
                dims_customer = dims_base.copy()
                dims_customer['resource_id'] = instance_cache.get(inst_name)['instance_uuid']
                dims_customer['zone'] = instance_cache.get(inst_name)['zone']
                # Add dimensions that would be helpful for operations
                dims_operations = dims_customer.copy()
                dims_operations['tenant_id'] = instance_cache.get(inst_name)['tenant_id']
                dims_operations = self._update_dims_with_metadata(instance_cache, inst_name, dims_operations)
                if self.init_config.get('customer_metadata'):
                    for metadata in self.init_config.get('customer_metadata'):
                        metadata_value = (instance_cache.get(inst_name).
                                          get(metadata))
                        if metadata_value:
                            dims_customer[metadata] = metadata_value
                # Remove customer 'hostname' dimension, this will be replaced by the VM name
                del(dims_customer['hostname'])
            except TypeError:
                # Nova can potentially get into a state where it can't see an
                # instance, but libvirt can.  This would cause TypeErrors as
                # incomplete data is cached for this instance.  Log and skip.
                self.log.error("{0} is not known to nova after instance cache update -- skipping this ghost VM.".format(inst_name))
                continue

            # Accumulate aggregate data
            for gauge in agg_gauges:
                if gauge in instance_cache.get(inst_name):
                    agg_values[gauge] += instance_cache.get(inst_name)[gauge]

            # Skip instances created within the probation period
            vm_probation_remaining = self._test_vm_probation(instance_cache.get(inst_name)['created'])
            if (vm_probation_remaining >= 0):
                self.log.info("Libvirt: {0} in probation for another {1} seconds".format(instance_cache.get(inst_name)['hostname'].encode('utf8'),
                                                                                         vm_probation_remaining))
                continue

            # Skip further processing on VMs that are not in an active state
            if self._inspect_state(insp, inst, inst_name, instance_cache,
                                   dims_customer, dims_operations) != 0:
                continue

            # Skip the remainder of the checks if alive_only is True in the config
            if self.init_config.get('alive_only'):
                continue

            if inst_name not in metric_cache:
                metric_cache[inst_name] = {}

            if self.init_config.get('vm_cpu_check_enable'):
                self._inspect_cpu(insp, inst, inst_name, instance_cache, metric_cache, dims_customer, dims_operations)
            if not self._collect_intervals['disk']['skip']:
                if self.init_config.get('vm_disks_check_enable'):
                    self._inspect_disks(insp, inst, inst_name, instance_cache, metric_cache, dims_customer,
                                        dims_operations)
                if self.init_config.get('vm_extended_disks_check_enable'):
                    self._inspect_disk_info(insp, inst, inst_name, instance_cache, metric_cache, dims_customer,
                                            dims_operations)

            if not self._collect_intervals['vnic']['skip']:
                if self.init_config.get('vm_network_check_enable'):
                    self._inspect_network(insp, inst, inst_name, instance_cache, metric_cache, dims_customer, dims_operations)

            # Memory utilizaion
            # (req. balloon driver; Linux kernel param CONFIG_VIRTIO_BALLOON)
            try:
                mem_stats = inst.memoryStats()
                mem_metrics = {'mem.free_mb': float(mem_stats['unused']) / 1024,
                               'mem.swap_used_mb': float(mem_stats['swap_out']) / 1024,
                               'mem.total_mb': float(mem_stats['available']) / 1024,
                               'mem.used_mb': float(mem_stats['available'] - mem_stats['unused']) / 1024,
                               'mem.free_perc': float(mem_stats['unused']) / float(mem_stats['available']) * 100}
                for name in mem_metrics:
                    self.gauge(name, mem_metrics[name], dimensions=dims_customer,
                               delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                               hostname=instance_cache.get(inst_name)['hostname'])
                    self.gauge("vm.{0}".format(name), mem_metrics[name],
                               dimensions=dims_operations)
                memory_info = insp.inspect_memory_resident(inst)
                self.gauge('vm.mem.resident_mb', float(memory_info.resident), dimensions=dims_operations)
            except KeyError:
                self.log.debug("Balloon driver not active/available on guest {0} ({1})".format(inst_name,
                                                                                               instance_cache.get(inst_name)['hostname']))
            # Test instance's remote responsiveness (ping check) if possible
            if (self.init_config.get('vm_ping_check_enable')) and self.init_config.get('ping_check') and 'network' in instance_cache.get(inst_name):
                for net in instance_cache.get(inst_name)['network']:
                    ping_args = [dims_customer, dims_operations, inst_name, instance_cache, net]
                    ping_results.append(self.pool.apply_async(self._run_ping, ping_args))

        # Save these metrics for the next collector invocation
        self._update_metric_cache(metric_cache, math.ceil(time.time() - time_start))

        # Publish aggregate metrics
        for gauge in agg_gauges:
            self.gauge(agg_gauges[gauge], agg_values[gauge], dimensions=dims_base)

        # Check results of ping tests
        self._check_ping_results(ping_results)
コード例 #4
0
ファイル: libvirt.py プロジェクト: jobrs/monasca-agent
    def check(self, instance):
        """Gather VM metrics for each instance"""

        time_start = time.time()

        # Load metric cache
        metric_cache = self._load_metric_cache()

        # Load the nova-obtained instance data cache
        instance_cache = self._load_instance_cache()

        # Build dimensions for both the customer and for operations
        dims_base = self._set_dimensions({'service': 'compute', 'component': 'vm'}, instance)

        # Define aggregate gauges, gauge name to metric name
        agg_gauges = {'vcpus': 'nova.vm.cpu.total_allocated',
                      'ram': 'nova.vm.mem.total_allocated_mb',
                      'disk': 'nova.vm.disk.total_allocated_gb'}
        agg_values = {}
        for gauge in agg_gauges.keys():
            agg_values[gauge] = 0

        insp = inspector.get_hypervisor_inspector()
        updated_cache_this_time = False
        ping_results = []
        for inst in insp._get_connection().listAllDomains():
            # Verify that this instance exists in the cache.  Add if necessary.
            inst_name = inst.name()
            if inst_name not in instance_cache and not updated_cache_this_time:
                #
                # If we have multiple ghost VMs, we'll needlessly
                # update the instance cache.  Let's limit the cache
                # update to once per agent wakeup.
                #
                updated_cache_this_time = True
                instance_cache = self._update_instance_cache()

            # Build customer dimensions
            try:
                dims_customer = dims_base.copy()
                dims_customer['resource_id'] = instance_cache.get(inst_name)['instance_uuid']
                dims_customer['zone'] = instance_cache.get(inst_name)['zone']
                # Add dimensions that would be helpful for operations
                dims_operations = dims_customer.copy()
                dims_operations['tenant_id'] = instance_cache.get(inst_name)['tenant_id']
                dims_operations = self._update_dims_with_metadata(instance_cache, inst_name, dims_operations)
                if self.init_config.get('customer_metadata'):
                    for metadata in self.init_config.get('customer_metadata'):
                        metadata_value = (instance_cache.get(inst_name).
                                          get(metadata))
                        if metadata_value:
                            dims_customer[metadata] = metadata_value
                # Remove customer 'hostname' dimension, this will be replaced by the VM name
                del(dims_customer['hostname'])
            except TypeError:
                # Nova can potentially get into a state where it can't see an
                # instance, but libvirt can.  This would cause TypeErrors as
                # incomplete data is cached for this instance.  Log and skip.
                self.log.error("{0} is not known to nova after instance cache update -- skipping this ghost VM.".format(inst_name))
                continue

            # Accumulate aggregate data
            for gauge in agg_gauges:
                if gauge in instance_cache.get(inst_name):
                    agg_values[gauge] += instance_cache.get(inst_name)[gauge]

            # Skip instances created within the probation period
            vm_probation_remaining = self._test_vm_probation(instance_cache.get(inst_name)['created'])
            if (vm_probation_remaining >= 0):
                self.log.info("Libvirt: {0} in probation for another {1} seconds".format(instance_cache.get(inst_name)['hostname'].encode('utf8'),
                                                                                         vm_probation_remaining))
                continue

            # Skip further processing on VMs that are not in an active state
            if self._inspect_state(insp, inst, inst_name, instance_cache,
                                   dims_customer, dims_operations) != 0:
                continue

            # Skip the remainder of the checks if alive_only is True in the config
            if self.init_config.get('alive_only'):
                continue

            if inst_name not in metric_cache:
                metric_cache[inst_name] = {}

            if self.init_config.get('vm_cpu_check_enable'):
                self._inspect_cpu(insp, inst, inst_name, instance_cache, metric_cache, dims_customer, dims_operations)
            if not self._collect_intervals['disk']['skip']:
                if self.init_config.get('vm_disks_check_enable'):
                    self._inspect_disks(insp, inst, inst_name, instance_cache, metric_cache, dims_customer,
                                        dims_operations)
                if self.init_config.get('vm_extended_disks_check_enable'):
                    self._inspect_disk_info(insp, inst, inst_name, instance_cache, metric_cache, dims_customer,
                                            dims_operations)

            if not self._collect_intervals['vnic']['skip']:
                if self.init_config.get('vm_network_check_enable'):
                    self._inspect_network(insp, inst, inst_name, instance_cache, metric_cache, dims_customer, dims_operations)

            # Memory utilizaion
            # (req. balloon driver; Linux kernel param CONFIG_VIRTIO_BALLOON)
            try:
                mem_stats = inst.memoryStats()
                mem_metrics = {'mem.free_mb': float(mem_stats['unused']) / 1024,
                               'mem.swap_used_mb': float(mem_stats['swap_out']) / 1024,
                               'mem.total_mb': float(mem_stats['available']) / 1024,
                               'mem.used_mb': float(mem_stats['available'] - mem_stats['unused']) / 1024,
                               'mem.free_perc': float(mem_stats['unused']) / float(mem_stats['available']) * 100}
                for name in mem_metrics:
                    self.gauge(name, mem_metrics[name], dimensions=dims_customer,
                               delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                               hostname=instance_cache.get(inst_name)['hostname'])
                    self.gauge("vm.{0}".format(name), mem_metrics[name],
                               dimensions=dims_operations)
                memory_info = insp.inspect_memory_resident(inst)
                self.gauge('vm.mem.resident_mb', float(memory_info.resident), dimensions=dims_operations)
            except KeyError:
                self.log.debug("Balloon driver not active/available on guest {0} ({1})".format(inst_name,
                                                                                               instance_cache.get(inst_name)['hostname']))
            # Test instance's remote responsiveness (ping check) if possible
            if (self.init_config.get('vm_ping_check_enable')) and self.init_config.get('ping_check') and 'network' in instance_cache.get(inst_name):
                for net in instance_cache.get(inst_name)['network']:
                    ping_args = [dims_customer, dims_operations, inst_name, instance_cache, net]
                    ping_results.append(self.pool.apply_async(self._run_ping, ping_args))

        # Save these metrics for the next collector invocation
        self._update_metric_cache(metric_cache, math.ceil(time.time() - time_start))

        # Publish aggregate metrics
        for gauge in agg_gauges:
            self.gauge(agg_gauges[gauge], agg_values[gauge], dimensions=dims_base)

        # Check results of ping tests
        self._check_ping_results(ping_results)
コード例 #5
0
ファイル: libvirt.py プロジェクト: heekof/monasca-agent
    def check(self, instance):
        """Gather VM metrics for each instance"""

        time_start = time.time()

        # Load metric cache
        metric_cache = self._load_metric_cache()

        # Load the nova-obtained instance data cache
        instance_cache = self._load_instance_cache()

        # Build dimensions for both the customer and for operations
        dims_base = self._set_dimensions({'service': 'compute', 'component': 'vm'}, instance)

        # Define aggregate gauges, gauge name to metric name
        agg_gauges = {'vcpus': 'nova.vm.cpu.total_allocated',
                      'ram': 'nova.vm.mem.total_allocated_mb',
                      'disk': 'nova.vm.disk.total_allocated_gb'}
        agg_values = {}
        for gauge in agg_gauges.keys():
            agg_values[gauge] = 0

        insp = inspector.get_hypervisor_inspector()
        for inst in insp._get_connection().listAllDomains():
            # Verify that this instance exists in the cache.  Add if necessary.
            inst_name = inst.name()
            if inst_name not in instance_cache:
                instance_cache = self._update_instance_cache()

            # Build customer dimensions
            try:
                dims_customer = dims_base.copy()
                dims_customer['resource_id'] = instance_cache.get(inst_name)['instance_uuid']
                dims_customer['zone'] = instance_cache.get(inst_name)['zone']
                # Add dimensions that would be helpful for operations
                dims_operations = dims_customer.copy()
                dims_operations['tenant_id'] = instance_cache.get(inst_name)['tenant_id']
                if self.init_config.get('metadata'):
                    for metadata in self.init_config.get('metadata'):
                        metadata_value = (instance_cache.get(inst_name).
                                          get(metadata))
                        if metadata_value:
                            dims_operations[metadata] = metadata_value
                # Remove customer 'hostname' dimension, this will be replaced by the VM name
                del(dims_customer['hostname'])
            except TypeError:
                # Nova can potentially get into a state where it can't see an
                # instance, but libvirt can.  This would cause TypeErrors as
                # incomplete data is cached for this instance.  Log and skip.
                self.log.error("{0} is not known to nova after instance cache update -- skipping this ghost VM.".format(inst_name))
                continue

            # Accumulate aggregate data
            for gauge in agg_gauges:
                if gauge in instance_cache.get(inst_name):
                    agg_values[gauge] += instance_cache.get(inst_name)[gauge]

            # Skip further processing on VMs that are not in an active state
            if self._inspect_state(insp, inst, instance_cache,
                                   dims_customer, dims_operations) != 0:
                continue

            # Skip the remainder of the checks if alive_only is True in the config
            if self.init_config.get('alive_only'):
                continue

            # Skip instances created within the probation period
            vm_probation_remaining = self._test_vm_probation(instance_cache.get(inst_name)['created'])
            if (vm_probation_remaining >= 0):
                self.log.info("Libvirt: {0} in probation for another {1} seconds".format(instance_cache.get(inst_name)['hostname'].encode('utf8'),
                                                                                         vm_probation_remaining))
                continue

            if inst_name not in metric_cache:
                metric_cache[inst_name] = {}

            self._inspect_cpu(insp, inst, instance_cache, metric_cache, dims_customer, dims_operations)
            self._inspect_disks(insp, inst, instance_cache, metric_cache, dims_customer, dims_operations)
            self._inspect_network(insp, inst, instance_cache, metric_cache, dims_customer, dims_operations)

            # Memory utilizaion
            # (req. balloon driver; Linux kernel param CONFIG_VIRTIO_BALLOON)
            try:
                mem_metrics = {'mem.free_mb': float(inst.memoryStats()['unused']) / 1024,
                               'mem.swap_used_mb': float(inst.memoryStats()['swap_out']) / 1024,
                               'mem.total_mb': float(inst.memoryStats()['available'] - inst.memoryStats()['unused']) / 1024,
                               'mem.used_mb': float(inst.memoryStats()['available'] - inst.memoryStats()['unused']) / 1024,
                               'mem.free_perc': float(inst.memoryStats()['unused']) / float(inst.memoryStats()['available']) * 100}
                for name in mem_metrics:
                    self.gauge(name, mem_metrics[name], dimensions=dims_customer,
                               delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                               hostname=instance_cache.get(inst_name)['hostname'])
                    self.gauge("vm.{0}".format(name), mem_metrics[name],
                               dimensions=dims_operations)
            except KeyError:
                self.log.debug("Balloon driver not active/available on guest {0} ({1})".format(inst_name,
                                                                                               instance_cache.get(inst_name)['hostname']))
            # Test instance's remote responsiveness (ping check) if possible
            if self.init_config.get('ping_check') and 'network' in instance_cache.get(inst_name):
                for net in instance_cache.get(inst_name)['network']:

                    ping_cmd = self.init_config.get('ping_check').replace('NAMESPACE',
                                                                          net['namespace']).split()
                    ping_cmd.append(net['ip'])
                    dims_customer_ip = dims_customer.copy()
                    dims_operations_ip = dims_operations.copy()
                    dims_customer_ip['ip'] = net['ip']
                    dims_operations_ip['ip'] = net['ip']
                    with open(os.devnull, "w") as fnull:
                        try:
                            self.log.debug("Running ping test: {0}".format(' '.join(ping_cmd)))
                            res = subprocess.call(ping_cmd,
                                                  stdout=fnull,
                                                  stderr=fnull)
                            self.gauge('ping_status', res, dimensions=dims_customer_ip,
                                       delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                                       hostname=instance_cache.get(inst_name)['hostname'])
                            self.gauge('vm.ping_status', res, dimensions=dims_operations_ip)
                        except OSError as e:
                            self.log.warn("OS error running '{0}' returned {1}".format(ping_cmd, e))

        # Save these metrics for the next collector invocation
        self._update_metric_cache(metric_cache, math.ceil(time.time() - time_start))

        # Publish aggregate metrics
        for gauge in agg_gauges:
            self.gauge(agg_gauges[gauge], agg_values[gauge], dimensions=dims_base)
コード例 #6
0
ファイル: libvirt.py プロジェクト: pradeep-av/monasca-agent
    def check(self, instance):
        """Gather VM metrics for each instance"""

        # Load metric cache
        metric_cache = self._load_metric_cache()

        # Load the nova-obtained instance data cache
        instance_cache = self._load_instance_cache()

        # Build dimensions for both the customer and for operations
        dims_base = self._set_dimensions({'service': 'compute', 'component': 'vm'}, instance)

        # Define aggregate gauges, gauge name to metric name
        agg_gauges = {'vcpus': 'nova.vm.cpu.total_allocated',
                      'ram': 'nova.vm.mem.total_allocated_mb',
                      'disk': 'nova.vm.disk.total_allocated_gb'}
        agg_values = {}
        for gauge in agg_gauges.keys():
            agg_values[gauge] = 0

        insp = inspector.get_hypervisor_inspector()
        for inst in insp._get_connection().listAllDomains():
            # Verify that this instance exists in the cache.  Add if necessary.
            inst_name = inst.name()
            if inst_name not in instance_cache:
                instance_cache = self._update_instance_cache()

            # Build customer dimensions
            try:
                dims_customer = dims_base.copy()
                dims_customer['resource_id'] = instance_cache.get(inst_name)['instance_uuid']
                dims_customer['zone'] = instance_cache.get(inst_name)['zone']
                # Add dimensions that would be helpful for operations
                dims_operations = dims_customer.copy()
                dims_operations['tenant_id'] = instance_cache.get(inst_name)['tenant_id']
                # Remove customer 'hostname' dimension, this will be replaced by the VM name
                del(dims_customer['hostname'])
            except TypeError:
                # Nova can potentially get into a state where it can't see an
                # instance, but libvirt can.  This would cause TypeErrors as
                # incomplete data is cached for this instance.  Log and skip.
                self.log.error("{0} is not known to nova after instance cache update -- skipping this ghost VM.".format(inst_name))
                continue

            # Skip instances that are inactive
            if inst.isActive() == 0:
                detail = 'Instance is not active'
                self.gauge('host_alive_status', 2, dimensions=dims_customer,
                           delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                           hostname=instance_cache.get(inst_name)['hostname'],
                           value_meta={'detail': detail})
                self.gauge('vm.host_alive_status', 2, dimensions=dims_operations,
                           value_meta={'detail': detail})
                continue
            if inst_name not in metric_cache:
                metric_cache[inst_name] = {}

            # Skip instances created within the probation period
            vm_probation_remaining = self._test_vm_probation(instance_cache.get(inst_name)['created'])
            if (vm_probation_remaining >= 0):
                self.log.info("Libvirt: {0} in probation for another {1} seconds".format(instance_cache.get(inst_name)['hostname'].encode('utf8'),
                                                                                         vm_probation_remaining))
                continue

            # Test instance's general responsiveness (ping check) if so configured
            if self.init_config.get('ping_check') and 'private_ip' in instance_cache.get(inst_name):
                detail = 'Ping check OK'
                ping_cmd = self.init_config.get('ping_check').split()
                ping_cmd.append(instance_cache.get(inst_name)['private_ip'])
                with open(os.devnull, "w") as fnull:
                    try:
                        res = subprocess.call(ping_cmd,
                                              stdout=fnull,
                                              stderr=fnull)
                        if res > 0:
                            detail = 'Host failed ping check'
                        self.gauge('host_alive_status', res, dimensions=dims_customer,
                                   delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                                   hostname=instance_cache.get(inst_name)['hostname'],
                                   value_meta={'detail': detail})
                        self.gauge('vm.host_alive_status', res, dimensions=dims_operations,
                                   value_meta={'detail': detail})
                        # Do not attempt to process any more metrics for offline hosts
                        if res > 0:
                            continue
                    except OSError as e:
                        self.log.warn("OS error running '{0}' returned {1}".format(ping_cmd, e))

            # Accumulate aggregate data
            for gauge in agg_gauges:
                if gauge in instance_cache.get(inst_name):
                    agg_values[gauge] += instance_cache.get(inst_name)[gauge]

            # CPU utilization percentage
            sample_time = float("{:9f}".format(time.time()))
            if 'cpu.time' in metric_cache[inst_name]:
                # I have a prior value, so calculate the rate & push the metric
                cpu_diff = insp.inspect_cpus(inst).time - metric_cache[inst_name]['cpu.time']['value']
                time_diff = sample_time - float(metric_cache[inst_name]['cpu.time']['timestamp'])
                # Convert time_diff to nanoseconds, and calculate percentage
                rate = (cpu_diff / (time_diff * 1000000000)) * 100

                self.gauge('cpu.utilization_perc', int(round(rate, 0)),
                           dimensions=dims_customer,
                           delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                           hostname=instance_cache.get(inst_name)['hostname'])
                self.gauge('vm.cpu.utilization_perc', int(round(rate, 0)),
                           dimensions=dims_operations)

            metric_cache[inst_name]['cpu.time'] = {'timestamp': sample_time,
                                                   'value': insp.inspect_cpus(inst).time}

            # Disk activity
            for disk in insp.inspect_disks(inst):
                sample_time = time.time()
                disk_dimensions = {'device': disk[0].device}
                for metric in disk[1]._fields:
                    metric_name = "io.{0}".format(metric)
                    if metric_name not in metric_cache[inst_name]:
                        metric_cache[inst_name][metric_name] = {}

                    value = int(disk[1].__getattribute__(metric))
                    if disk[0].device in metric_cache[inst_name][metric_name]:
                        time_diff = sample_time - metric_cache[inst_name][metric_name][disk[0].device]['timestamp']
                        val_diff = value - metric_cache[inst_name][metric_name][disk[0].device]['value']
                        # Change the metric name to a rate, ie. "io.read_requests"
                        # gets converted to "io.read_ops_sec"
                        rate_name = "{0}_sec".format(metric_name.replace('requests', 'ops'))
                        # Customer
                        this_dimensions = disk_dimensions.copy()
                        this_dimensions.update(dims_customer)
                        self.gauge(rate_name, val_diff, dimensions=this_dimensions,
                                   delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                                   hostname=instance_cache.get(inst_name)['hostname'])
                        # Operations (metric name prefixed with "vm."
                        this_dimensions = disk_dimensions.copy()
                        this_dimensions.update(dims_operations)
                        self.gauge("vm.{0}".format(rate_name), val_diff,
                                   dimensions=this_dimensions)
                    # Save this metric to the cache
                    metric_cache[inst_name][metric_name][disk[0].device] = {
                        'timestamp': sample_time,
                        'value': value}

            # Memory utilizaion
            # (req. balloon driver; Linux kernel param CONFIG_VIRTIO_BALLOON)
            try:
                mem_metrics = {'mem.free_mb': float(inst.memoryStats()['unused']) / 1024,
                               'mem.swap_used_mb': float(inst.memoryStats()['swap_out']) / 1024,
                               'mem.total_mb': float(inst.memoryStats()['available'] - inst.memoryStats()['unused']) / 1024,
                               'mem.used_mb': float(inst.memoryStats()['available'] - inst.memoryStats()['unused']) / 1024,
                               'mem.free_perc': float(inst.memoryStats()['unused']) / float(inst.memoryStats()['available']) * 100}
                for name in mem_metrics:
                    self.gauge(name, mem_metrics[name], dimensions=dims_customer,
                               delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                               hostname=instance_cache.get(inst_name)['hostname'])
                    self.gauge("vm.{0}".format(name), mem_metrics[name],
                               dimensions=dims_operations)
            except KeyError:
                self.log.debug("Balloon driver not active/available on guest {0} ({1})".format(inst_name,
                                                                                               instance_cache.get(inst_name)['hostname']))

            # Network activity
            for vnic in insp.inspect_vnics(inst):
                sample_time = time.time()
                vnic_dimensions = {'device': vnic[0].name}
                for metric in vnic[1]._fields:
                    metric_name = "net.{0}".format(metric)
                    if metric_name not in metric_cache[inst_name]:
                        metric_cache[inst_name][metric_name] = {}

                    value = int(vnic[1].__getattribute__(metric))
                    if vnic[0].name in metric_cache[inst_name][metric_name]:
                        time_diff = sample_time - metric_cache[inst_name][metric_name][vnic[0].name]['timestamp']
                        val_diff = value - metric_cache[inst_name][metric_name][vnic[0].name]['value']
                        # Change the metric name to a rate, ie. "net.rx_bytes"
                        # gets converted to "net.rx_bytes_sec"
                        rate_name = "{0}_sec".format(metric_name)
                        # Rename "tx" to "out" and "rx" to "in"
                        rate_name = rate_name.replace("tx", "out")
                        rate_name = rate_name.replace("rx", "in")
                        # Customer
                        this_dimensions = vnic_dimensions.copy()
                        this_dimensions.update(dims_customer)
                        self.gauge(rate_name, val_diff,
                                   dimensions=this_dimensions,
                                   delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                                   hostname=instance_cache.get(inst_name)['hostname'])
                        # Operations (metric name prefixed with "vm."
                        this_dimensions = vnic_dimensions.copy()
                        this_dimensions.update(dims_operations)
                        self.gauge("vm.{0}".format(rate_name), val_diff,
                                   dimensions=this_dimensions)
                    # Save this metric to the cache
                    metric_cache[inst_name][metric_name][vnic[0].name] = {
                        'timestamp': sample_time,
                        'value': value}

        # Save these metrics for the next collector invocation
        self._update_metric_cache(metric_cache)

        # Publish aggregate metrics
        for gauge in agg_gauges:
            self.gauge(agg_gauges[gauge], agg_values[gauge], dimensions=dims_base)
コード例 #7
0
ファイル: libvirt.py プロジェクト: whiteear/monasca-agent
    def check(self, instance):
        """Gather VM metrics for each instance"""

        # Load metric cache
        metric_cache = self._load_metric_cache()

        # Load the nova-obtained instance data cache
        instance_cache = self._load_instance_cache()

        # Build dimensions for both the customer and for operations
        dims_base = self._set_dimensions({'service': 'compute', 'component': 'vm'}, instance)

        insp = inspector.get_hypervisor_inspector()
        for inst in insp.inspect_instances():
            # Verify that this instance exists in the cache.  Add if necessary.
            if inst.name not in instance_cache:
                instance_cache = self._update_instance_cache()
            if inst.name not in metric_cache:
                metric_cache[inst.name] = {}

            # Skip instances created within the probation period
            vm_probation_remaining = self._test_vm_probation(instance_cache.get(inst.name)['created'])
            if (vm_probation_remaining >= 0):
                self.log.info("Libvirt: {0} in probation for another {1} seconds".format(instance_cache.get(inst.name)['hostname'],
                                                                                         vm_probation_remaining))
                continue

            # Build customer dimensions
            dims_customer = dims_base.copy()
            dims_customer['resource_id'] = instance_cache.get(inst.name)['instance_uuid']
            dims_customer['zone'] = instance_cache.get(inst.name)['zone']
            # Add dimensions that would be helpful for operations
            dims_operations = dims_customer.copy()
            dims_operations['tenant_id'] = instance_cache.get(inst.name)['tenant_id']
            dims_operations['cloud_tier'] = 'overcloud'

            # CPU utilization percentage
            sample_time = float("{:9f}".format(time.time()))
            if 'cpu.time' in metric_cache[inst.name]:
                # I have a prior value, so calculate the rate & push the metric
                cpu_diff = insp.inspect_cpus(inst.name).time - metric_cache[inst.name]['cpu.time']['value']
                time_diff = sample_time - float(metric_cache[inst.name]['cpu.time']['timestamp'])
                # Convert time_diff to nanoseconds, and calculate percentage
                rate = (cpu_diff / (time_diff * 1000000000)) * 100

                self.gauge('cpu.utilization_perc', int(round(rate, 0)),
                           dimensions=dims_customer,
                           delegated_tenant=instance_cache.get(inst.name)['tenant_id'],
                           hostname=instance_cache.get(inst.name)['hostname'])
                self.gauge('vm.cpu.utilization_perc', int(round(rate, 0)),
                           dimensions=dims_operations)

            metric_cache[inst.name]['cpu.time'] = {'timestamp': sample_time,
                                                   'value': insp.inspect_cpus(inst.name).time}

            # Disk utilization
            for disk in insp.inspect_disks(inst.name):
                sample_time = time.time()
                disk_dimensions = {'device': disk[0].device}
                for metric in disk[1]._fields:
                    metric_name = "io.{0}".format(metric)
                    if metric_name not in metric_cache[inst.name]:
                        metric_cache[inst.name][metric_name] = {}

                    value = int(disk[1].__getattribute__(metric))
                    if disk[0].device in metric_cache[inst.name][metric_name]:
                        time_diff = sample_time - metric_cache[inst.name][metric_name][disk[0].device]['timestamp']
                        val_diff = value - metric_cache[inst.name][metric_name][disk[0].device]['value']
                        # Change the metric name to a rate, ie. "io.read_requests"
                        # gets converted to "io.read_ops_sec"
                        rate_name = "{0}_sec".format(metric_name.replace('requests', 'ops'))
                        # Customer
                        this_dimensions = disk_dimensions.copy()
                        this_dimensions.update(dims_customer)
                        self.gauge(rate_name, val_diff, dimensions=this_dimensions,
                                   delegated_tenant=instance_cache.get(inst.name)['tenant_id'],
                                   hostname=instance_cache.get(inst.name)['hostname'])
                        # Operations (metric name prefixed with "vm."
                        this_dimensions = disk_dimensions.copy()
                        this_dimensions.update(dims_operations)
                        self.gauge("vm.{0}".format(rate_name), val_diff,
                                   dimensions=this_dimensions)
                    # Save this metric to the cache
                    metric_cache[inst.name][metric_name][disk[0].device] = {
                        'timestamp': sample_time,
                        'value': value}

            # Network utilization
            for vnic in insp.inspect_vnics(inst.name):
                sample_time = time.time()
                vnic_dimensions = {'device': vnic[0].name}
                for metric in vnic[1]._fields:
                    metric_name = "net.{0}".format(metric)
                    if metric_name not in metric_cache[inst.name]:
                        metric_cache[inst.name][metric_name] = {}

                    value = int(vnic[1].__getattribute__(metric))
                    if vnic[0].name in metric_cache[inst.name][metric_name]:
                        time_diff = sample_time - metric_cache[inst.name][metric_name][vnic[0].name]['timestamp']
                        val_diff = value - metric_cache[inst.name][metric_name][vnic[0].name]['value']
                        # Change the metric name to a rate, ie. "net.rx_bytes"
                        # gets converted to "net.rx_bytes_sec"
                        rate_name = "{0}_sec".format(metric_name)
                        # Rename "tx" to "out" and "rx" to "in"
                        rate_name = rate_name.replace("tx", "out")
                        rate_name = rate_name.replace("rx", "in")
                        # Customer
                        this_dimensions = vnic_dimensions.copy()
                        this_dimensions.update(dims_customer)
                        self.gauge(rate_name, val_diff,
                                   dimensions=this_dimensions,
                                   delegated_tenant=instance_cache.get(inst.name)['tenant_id'],
                                   hostname=instance_cache.get(inst.name)['hostname'])
                        # Operations (metric name prefixed with "vm."
                        this_dimensions = vnic_dimensions.copy()
                        this_dimensions.update(dims_operations)
                        self.gauge("vm.{0}".format(rate_name), val_diff,
                                   dimensions=this_dimensions)
                    # Save this metric to the cache
                    metric_cache[inst.name][metric_name][vnic[0].name] = {
                        'timestamp': sample_time,
                        'value': value}

        # Save these metrics for the next collector invocation
        self._update_metric_cache(metric_cache)
コード例 #8
0
    def check(self, instance):
        """Gather VM metrics for each instance"""

        # Load metric cache
        metric_cache = self._load_metric_cache()

        # Load the nova-obtained instance data cache
        instance_cache = self._load_instance_cache()

        # Build dimensions for both the customer and for operations
        dims_base = self._set_dimensions({'service': 'compute', 'component': 'vm'}, instance)

        # Define aggregate gauges, gauge name to metric name
        agg_gauges = {'vcpus': 'nova.vm.cpu.total_allocated',
                      'ram': 'nova.vm.mem.total_allocated_mb',
                      'disk': 'nova.vm.disk.total_allocated_gb'}
        agg_values = {}
        for gauge in agg_gauges.keys():
            agg_values[gauge] = 0

        insp = inspector.get_hypervisor_inspector()
        for inst in insp._get_connection().listAllDomains():
            # Verify that this instance exists in the cache.  Add if necessary.
            inst_name = inst.name()
            if inst_name not in instance_cache:
                instance_cache = self._update_instance_cache()

            # Build customer dimensions
            try:
                dims_customer = dims_base.copy()
                dims_customer['resource_id'] = instance_cache.get(inst_name)['instance_uuid']
                dims_customer['zone'] = instance_cache.get(inst_name)['zone']
                # Add dimensions that would be helpful for operations
                dims_operations = dims_customer.copy()
                dims_operations['tenant_id'] = instance_cache.get(inst_name)['tenant_id']
                # Remove customer 'hostname' dimension, this will be replaced by the VM name
                del(dims_customer['hostname'])
            except TypeError:
                # Nova can potentially get into a state where it can't see an
                # instance, but libvirt can.  This would cause TypeErrors as
                # incomplete data is cached for this instance.  Log and skip.
                self.log.error("{0} is not known to nova after instance cache update -- skipping this ghost VM.".format(inst_name))
                continue

            # Skip instances that are inactive
            if inst.isActive() == 0:
                detail = 'Instance is not active'
                self.gauge('host_alive_status', 2, dimensions=dims_customer,
                           delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                           hostname=instance_cache.get(inst_name)['hostname'],
                           value_meta={'detail': detail})
                self.gauge('vm.host_alive_status', 2, dimensions=dims_operations,
                           value_meta={'detail': detail})
                continue
            if inst_name not in metric_cache:
                metric_cache[inst_name] = {}

            # Skip instances created within the probation period
            vm_probation_remaining = self._test_vm_probation(instance_cache.get(inst_name)['created'])
            if (vm_probation_remaining >= 0):
                self.log.info("Libvirt: {0} in probation for another {1} seconds".format(instance_cache.get(inst_name)['hostname'],
                                                                                         vm_probation_remaining))
                continue

            # Test instance's general responsiveness (ping check) if so configured
            if self.init_config.get('ping_check') and 'private_ip' in instance_cache.get(inst_name):
                detail = 'Ping check OK'
                ping_cmd = self.init_config.get('ping_check').split()
                ping_cmd.append(instance_cache.get(inst_name)['private_ip'])
                with open(os.devnull, "w") as fnull:
                    try:
                        res = subprocess.call(ping_cmd,
                                              stdout=fnull,
                                              stderr=fnull)
                        if res > 0:
                            detail = 'Host failed ping check'
                        self.gauge('host_alive_status', res, dimensions=dims_customer,
                                   delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                                   hostname=instance_cache.get(inst_name)['hostname'],
                                   value_meta={'detail': detail})
                        self.gauge('vm.host_alive_status', res, dimensions=dims_operations,
                                   value_meta={'detail': detail})
                        # Do not attempt to process any more metrics for offline hosts
                        if res > 0:
                            continue
                    except OSError as e:
                        self.log.warn("OS error running '{0}' returned {1}".format(ping_cmd, e))

            # Accumulate aggregate data
            for gauge in agg_gauges:
                if gauge in instance_cache.get(inst_name):
                    agg_values[gauge] += instance_cache.get(inst_name)[gauge]

            # CPU utilization percentage
            sample_time = float("{:9f}".format(time.time()))
            if 'cpu.time' in metric_cache[inst_name]:
                # I have a prior value, so calculate the rate & push the metric
                cpu_diff = insp.inspect_cpus(inst).time - metric_cache[inst_name]['cpu.time']['value']
                time_diff = sample_time - float(metric_cache[inst_name]['cpu.time']['timestamp'])
                # Convert time_diff to nanoseconds, and calculate percentage
                rate = (cpu_diff / (time_diff * 1000000000)) * 100

                self.gauge('cpu.utilization_perc', int(round(rate, 0)),
                           dimensions=dims_customer,
                           delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                           hostname=instance_cache.get(inst_name)['hostname'])
                self.gauge('vm.cpu.utilization_perc', int(round(rate, 0)),
                           dimensions=dims_operations)

            metric_cache[inst_name]['cpu.time'] = {'timestamp': sample_time,
                                                   'value': insp.inspect_cpus(inst).time}

            # Disk activity
            for disk in insp.inspect_disks(inst):
                sample_time = time.time()
                disk_dimensions = {'device': disk[0].device}
                for metric in disk[1]._fields:
                    metric_name = "io.{0}".format(metric)
                    if metric_name not in metric_cache[inst_name]:
                        metric_cache[inst_name][metric_name] = {}

                    value = int(disk[1].__getattribute__(metric))
                    if disk[0].device in metric_cache[inst_name][metric_name]:
                        time_diff = sample_time - metric_cache[inst_name][metric_name][disk[0].device]['timestamp']
                        val_diff = value - metric_cache[inst_name][metric_name][disk[0].device]['value']
                        # Change the metric name to a rate, ie. "io.read_requests"
                        # gets converted to "io.read_ops_sec"
                        rate_name = "{0}_sec".format(metric_name.replace('requests', 'ops'))
                        # Customer
                        this_dimensions = disk_dimensions.copy()
                        this_dimensions.update(dims_customer)
                        self.gauge(rate_name, val_diff, dimensions=this_dimensions,
                                   delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                                   hostname=instance_cache.get(inst_name)['hostname'])
                        # Operations (metric name prefixed with "vm."
                        this_dimensions = disk_dimensions.copy()
                        this_dimensions.update(dims_operations)
                        self.gauge("vm.{0}".format(rate_name), val_diff,
                                   dimensions=this_dimensions)
                    # Save this metric to the cache
                    metric_cache[inst_name][metric_name][disk[0].device] = {
                        'timestamp': sample_time,
                        'value': value}

            # Disk utilization
            # TODO(dschroeder)

            # Memory utilizaion
            # TODO(dschroeder)

            # Network activity
            for vnic in insp.inspect_vnics(inst):
                sample_time = time.time()
                vnic_dimensions = {'device': vnic[0].name}
                for metric in vnic[1]._fields:
                    metric_name = "net.{0}".format(metric)
                    if metric_name not in metric_cache[inst_name]:
                        metric_cache[inst_name][metric_name] = {}

                    value = int(vnic[1].__getattribute__(metric))
                    if vnic[0].name in metric_cache[inst_name][metric_name]:
                        time_diff = sample_time - metric_cache[inst_name][metric_name][vnic[0].name]['timestamp']
                        val_diff = value - metric_cache[inst_name][metric_name][vnic[0].name]['value']
                        # Change the metric name to a rate, ie. "net.rx_bytes"
                        # gets converted to "net.rx_bytes_sec"
                        rate_name = "{0}_sec".format(metric_name)
                        # Rename "tx" to "out" and "rx" to "in"
                        rate_name = rate_name.replace("tx", "out")
                        rate_name = rate_name.replace("rx", "in")
                        # Customer
                        this_dimensions = vnic_dimensions.copy()
                        this_dimensions.update(dims_customer)
                        self.gauge(rate_name, val_diff,
                                   dimensions=this_dimensions,
                                   delegated_tenant=instance_cache.get(inst_name)['tenant_id'],
                                   hostname=instance_cache.get(inst_name)['hostname'])
                        # Operations (metric name prefixed with "vm."
                        this_dimensions = vnic_dimensions.copy()
                        this_dimensions.update(dims_operations)
                        self.gauge("vm.{0}".format(rate_name), val_diff,
                                   dimensions=this_dimensions)
                    # Save this metric to the cache
                    metric_cache[inst_name][metric_name][vnic[0].name] = {
                        'timestamp': sample_time,
                        'value': value}

        # Save these metrics for the next collector invocation
        self._update_metric_cache(metric_cache)

        # Publish aggregate metrics
        for gauge in agg_gauges:
            self.gauge(agg_gauges[gauge], agg_values[gauge], dimensions=dims_base)