def set_error_values(self, requests, valid_from, valid_to, service=None, resource=None, ows_service=None): with_errors = requests.filter(exceptions__isnull=False) if not with_errors.exists(): return labels = ExceptionEvent.objects.filter(request__in=with_errors)\ .distinct()\ .values_list('error_type', flat=True) defaults = {'valid_from': valid_from, 'valid_to': valid_to, 'resource': resource, 'ows_service': ows_service, 'metric': 'response.error.count', 'samples_count': requests.count(), 'label': 'count', 'service': service} cnt = with_errors.count() print MetricValue.add(value=cnt, value_num=cnt, value_raw=cnt, **defaults) defaults['metric'] = 'response.error.types' for label in labels: cnt = with_errors.filter(exceptions__error_type=label).count() defaults['label'] = label defaults['samples_count'] = cnt print MetricValue.add(value=cnt, value_num=cnt, value_raw=cnt, **defaults)
def test_notifications_views(self): start = datetime.utcnow().replace(tzinfo=pytz.utc) start_aligned = align_period_start(start, self.service.check_interval) end_aligned = start_aligned + self.service.check_interval # sanity check self.assertTrue(start_aligned < start < end_aligned) resource, _ = MonitoredResource.objects.get_or_create(type='layer', name='test:test') resource2, _ = MonitoredResource.objects.get_or_create( type='layer', name='test:test2') label, _ = MetricLabel.objects.get_or_create(name='discount') MetricValue.add(self.metric, start_aligned, end_aligned, self.service, label="Count", value_raw=10, value_num=10, value=10) nc = NotificationCheck.objects.create(name='check requests', description='check requests') MetricNotificationCheck.objects.create(notification_check=nc, service=self.service, metric=self.metric, min_value=10, max_value=200, resource=resource, max_timeout=None) c = self.client c.login(username=self.user, password=self.passwd) nresp = c.get(reverse('monitoring:api_user_notifications')) self.assertEqual(nresp.status_code, 200) data = json.loads(nresp.content) self.assertTrue(data['data'][0]['id'] == nc.id) nresp = c.get( reverse('monitoring:api_user_notification_config', kwargs={'pk': nc.id})) self.assertEqual(nresp.status_code, 200) data = json.loads(nresp.content) self.assertTrue(data['data']['notification']['id'] == nc.id) nresp = c.get(reverse('monitoring:api_user_notifications')) self.assertEqual(nresp.status_code, 200) data = json.loads(nresp.content) self.assertTrue(data['data'][0]['id'] == nc.id) c.login(username=self.user2, password=self.passwd2) nresp = c.get(reverse('monitoring:api_user_notifications')) self.assertEqual(nresp.status_code, 200) data = json.loads(nresp.content) self.assertTrue(len(data['data']) == 1)
def test_notifications_views(self): start = datetime.utcnow().replace(tzinfo=pytz.utc) start_aligned = align_period_start(start, self.service.check_interval) end_aligned = start_aligned + self.service.check_interval # sanity check self.assertTrue(start_aligned < start < end_aligned) resource, _ = MonitoredResource.objects.get_or_create( type='layer', name='test:test') resource2, _ = MonitoredResource.objects.get_or_create( type='layer', name='test:test2') label, _ = MetricLabel.objects.get_or_create(name='discount') MetricValue.add(self.metric, start_aligned, end_aligned, self.service, label="Count", value_raw=10, value_num=10, value=10) nc = NotificationCheck.objects.create( name='check requests', description='check requests') MetricNotificationCheck.objects.create(notification_check=nc, service=self.service, metric=self.metric, min_value=10, max_value=200, resource=resource, max_timeout=None) c = self.client c.login(username=self.user, password=self.passwd) nresp = c.get(reverse('monitoring:api_user_notifications')) self.assertEqual(nresp.status_code, 200) data = json.loads(nresp.content) self.assertTrue(data['data'][0]['id'] == nc.id) nresp = c.get( reverse('monitoring:api_user_notification_config', kwargs={'pk': nc.id})) self.assertEqual(nresp.status_code, 200) data = json.loads(nresp.content) self.assertTrue(data['data']['notification']['id'] == nc.id) nresp = c.get(reverse('monitoring:api_user_notifications')) self.assertEqual(nresp.status_code, 200) data = json.loads(nresp.content) self.assertTrue(data['data'][0]['id'] == nc.id) c.login(username=self.user2, password=self.passwd2) nresp = c.get(reverse('monitoring:api_user_notifications')) self.assertEqual(nresp.status_code, 200) data = json.loads(nresp.content) self.assertTrue(len(data['data']) == 1)
def get_network_rate(row, value, metric_defaults, metric_name, valid_to): iface_label = get_iface_name(row) if not iface_label: print('no label', metric_name, row.get('description')) return rate = self._calculate_rate(metric_name, iface_label, value, valid_to) if rate is None: print('no rate for', metric_name) return mdata = {'value': rate, 'value_raw': rate, 'value_num': rate, 'label': iface_label, 'metric': '{}.rate'.format(metric_name)} mdata.update(metric_defaults) print MetricValue.add(**mdata)
def get_network_rate(row, value, metric_defaults, metric_name, valid_to): iface_label = get_iface_name(row) if not iface_label: print('no label', metric_name, row.get('description')) return rate = self._calculate_rate( metric_name, iface_label, value, valid_to) if rate is None: print('no rate for', metric_name) return mdata = {'value': rate, 'value_raw': rate, 'value_num': rate, 'label': iface_label, 'metric': '{}.rate'.format(metric_name)} mdata.update(metric_defaults) print MetricValue.add(**mdata)
def set_metric_values(self, metric_name, column_name, requests, service, **metric_values): metric = Metric.get_for(metric_name, service=service) q = requests def _key(v): return v['value'] # we need list of three items: # * value - numeric value for given metric # * label - label value to be used # * samples count - number of samples for a metric if metric.is_rate: row = requests.aggregate(value=models.Avg(column_name)) row['samples'] = requests.count() row['label'] = 'rate' q = [row] elif metric.is_count: q = [] values = requests.distinct( column_name).values_list(column_name, flat=True) for v in values: row = requests.filter(**{column_name: v})\ .aggregate(value=models.Sum(column_name), samples=models.Count(column_name)) row['label'] = v q.append(row) q.sort(key=_key) q.reverse() elif metric.is_value: q = [] values = requests.distinct( column_name).values_list(column_name, flat=True) for v in values: row = requests.filter(**{column_name: v})\ .aggregate(value=models.Count(column_name), samples=models.Count(column_name)) row['label'] = v q.append(row) q.sort(key=_key) q.reverse() elif metric.is_value_numeric: q = [] row = requests.aggregate(value=models.Max(column_name), samples=models.Count(column_name)) row['label'] = v q.append(row) else: raise ValueError("Unsupported metric type: {}".format(metric.type)) rows = q[:100] metric_values.update({'metric': metric_name, 'service': service}) for row in rows: label = row['label'] value = row['value'] samples = row['samples'] metric_values.update({'value': value or 0, 'label': label, 'samples_count': samples, 'value_raw': value or 0, 'value_num': value if isinstance(value, (int, float, long, Decimal,)) else None}) print MetricValue.add(**metric_values)
def process_host_geonode(self, service, data, valid_from, valid_to): """ Generates mertic values for system-level measurements """ utc = pytz.utc import dateutil.parser collected_at = parse_datetime(dateutil.parser.parse(data['timestamp']) .strftime("%Y-%m-%d %H:%M:%S")).replace(tzinfo=utc) valid_from = align_period_start(collected_at, service.check_interval) valid_to = align_period_end(collected_at, service.check_interval) mdefaults = {'valid_from': valid_from, 'valid_to': valid_to, 'resource': None, 'samples_count': 1, 'service': service} MetricValue.objects.filter(service_metric__metric__name__in=('network.in', 'network.out'), valid_from=valid_from, valid_to=valid_to, service=service)\ .delete() for ifname, ifdata in data['data']['network'].iteritems(): for tx_label, tx_value in ifdata['traffic'].items(): mdata = {'value': tx_value, 'value_raw': tx_value, 'value_num': tx_value, 'label': ifname, 'metric': 'network.{}'.format(tx_label)} mdata.update(mdefaults) rate = self._calculate_rate( mdata['metric'], ifname, tx_value, valid_to) print MetricValue.add(**mdata) if rate: mdata['metric'] = '{}.rate'.format(mdata['metric']) mdata['value'] = rate mdata['value_num'] = rate mdata['value_raw'] = rate print MetricValue.add(**mdata) ldata = data['data']['load'] llabel = ['1', '5', '15'] memory_info = data['data']['memory'] mkeys = [m.name[len('mem.'):] for m in service.get_metrics() if m.name.startswith('mem.')] for mkey in mkeys: mdata = memory_info.get(mkey) if not mdata: continue mdata = {'value': mdata, 'value_raw': mdata, 'value_num': mdata, 'metric': 'mem.{}'.format(mkey), 'label': 'B', } mdata.update(mdefaults) MetricValue.objects.filter(service_metric__metric__name=mdata['metric'], valid_from=mdata['valid_from'], valid_to=mdata['valid_to'], label__name='MB', service=service)\ .delete() print MetricValue.add(**mdata) MetricValue.objects.filter(service_metric__metric__name__in=('storage.total', 'storage.used', 'storage.free',), valid_from=valid_from, valid_to=valid_to, service=service)\ .delete() for df in data['data']['disks']: # dev = df['device'] total = df['total'] used = df['used'] free = df['free'] # free_pct = df['percent'] mount = df['mountpoint'] for metric, val in (('storage.total', total,), ('storage.used', used,), ('storage.free', free,),): mdata = {'value': val, 'value_raw': val, 'value_num': val, 'metric': metric, 'label': mount, } mdata.update(mdefaults) print MetricValue.add(**mdata) if ldata: for lidx, l in enumerate(ldata): mdata = {'value': l, 'value_raw': l, 'value_num': l, 'metric': 'load.{}m'.format(llabel[lidx]), 'label': 'Value', } mdata.update(mdefaults) MetricValue.objects.filter(service_metric__metric__name=mdata['metric'], valid_from=mdata['valid_from'], valid_to=mdata['valid_to'], label__name='Value', service=service)\ .delete() print MetricValue.add(**mdata) uptime = data['data'].get('uptime') if uptime is not None: mdata = {'value': uptime, 'value_raw': uptime, 'value_num': uptime, 'metric': 'uptime', 'label': 'Seconds'} mdata.update(mdefaults) MetricValue.objects.filter(service_metric__metric__name=mdata['metric'], valid_from=mdata['valid_from'], valid_to=mdata['valid_to'], label__name=mdata['label'], service=service)\ .delete() print MetricValue.add(**mdata) if data['data'].get('cpu'): l = data['data']['cpu']['usage'] mdata = {'value': l, 'value_raw': l, 'value_num': l, 'metric': 'cpu.usage', 'label': 'Seconds', } mdata.update(mdefaults) MetricValue.objects.filter(service_metric__metric__name=mdata['metric'], valid_from=mdata['valid_from'], valid_to=mdata['valid_to'], label__name=mdata['label'], service=service)\ .delete() print MetricValue.add(**mdata) rate = self._calculate_rate( mdata['metric'], mdata['label'], mdata['value'], mdata['valid_to']) if rate: rate_data = mdata.copy() rate_data['metric'] = '{}.rate'.format(mdata['metric']) rate_data['value'] = rate rate_data['value_num'] = rate rate_data['value_raw'] = rate print MetricValue.add(**rate_data) percent = self._calculate_percent( mdata['metric'], mdata['label'], mdata['value'], mdata['valid_to']) if percent: percent_data = mdata.copy() percent_data['metric'] = '{}.percent'.format(mdata['metric']) percent_data['value'] = percent percent_data['value_num'] = percent percent_data['value_raw'] = percent percent_data['label'] = 'Value' print MetricValue.add(**percent_data) mdata.update(mdefaults) print MetricValue.add(**mdata)
def process_host_geoserver(self, service, data, valid_from, valid_to): """ Generates mertic values for system-level measurements """ desc_re = re.compile(r'\[(\w+)\]') def get_iface_name(row): desc = row['description'] m = desc_re.search(desc) if m is None: return return m.groups()[0] def get_network_rate(row, value, metric_defaults, metric_name, valid_to): iface_label = get_iface_name(row) if not iface_label: print('no label', metric_name, row.get('description')) return rate = self._calculate_rate( metric_name, iface_label, value, valid_to) if rate is None: print('no rate for', metric_name) return mdata = {'value': rate, 'value_raw': rate, 'value_num': rate, 'label': iface_label, 'metric': '{}.rate'.format(metric_name)} mdata.update(metric_defaults) print MetricValue.add(**mdata) def get_mem_label(*args): return 'B' # gs metric -> monitoring metric name, label function, postproc # function GS_METRIC_MAP = dict((('SYSTEM_UPTIME', ('uptime', None, None,),), ('SYSTEM_AVERAGE_LOAD', ('load.1m', None, None,),), ('CPU_LOAD', ('cpu.usage.percent', None, None,),), ('MEMORY_USED', ('mem.usage.percent', get_mem_label, None,),), ('MEMORY_TOTAL', ('mem.all', get_mem_label, None,),), ('MEMORY_FREE', ('mem.free', get_mem_label, None,),), ('NETWORK_INTERFACE_SEND', ('network.out', get_iface_name, get_network_rate),), ('NETWORK_INTERFACE_RECEIVED', ('network.in', get_iface_name, get_network_rate),), ('NETWORK_INTERFACES_SEND', ('network.out', None, get_network_rate),), ('NETWORK_INTERFACES_RECEIVED', ('network.in', None, get_network_rate),), ) ) utc = pytz.utc collected_at = datetime.utcnow().replace(tzinfo=utc) valid_from = align_period_start(collected_at, service.check_interval) valid_to = align_period_end(collected_at, service.check_interval) mdefaults = {'valid_from': valid_from, 'valid_to': valid_to, 'resource': None, 'samples_count': 1, 'service': service} metrics = [m[0] for m in GS_METRIC_MAP.values()] MetricValue.objects.filter(service_metric__metric__name__in=metrics, valid_from=valid_from, valid_to=valid_to, service=service)\ .delete() for metric_data in data: map_data = GS_METRIC_MAP.get(metric_data['name']) if not map_data: continue metric_name, label_function, processing_function = map_data if metric_name is None: continue value = metric_data['value'] if isinstance(value, (str, unicode,)): value = value.replace(',', '.') mdata = {'value': value, 'value_raw': value, 'value_num': value, 'label': label_function(metric_data) if callable(label_function) else None, 'metric': metric_name} mdata.update(mdefaults) print MetricValue.add(**mdata) if callable(processing_function): processing_function( metric_data, value, mdefaults, metric_name, valid_to)
def process_requests_batch(self, service, requests, valid_from, valid_to): """ Processes requests information into metric values """ if not requests.count(): return log.info("Processing batch of %s requests from %s to %s", requests.count(), valid_from, valid_to) metric_defaults = {'valid_from': valid_from, 'valid_to': valid_to, 'requests': requests, 'service': service} MetricValue.objects.filter( valid_from__gte=valid_from, valid_to__lte=valid_to, service=service).delete() requests = requests.filter(service=service) resources = self.extract_resources(requests) count = requests.count() paths = requests.distinct('request_path').values_list( 'request_path', flat=True) print MetricValue.add('request.count', valid_from, valid_to, service, 'Count', value=count, value_num=count, value_raw=count, samples_count=count, resource=None) for path in paths: count = requests.filter(request_path=path).count() print MetricValue.add('request.path', valid_from, valid_to, service, path, value=count, value_num=count, value_raw=count, samples_count=count, resource=None) # calculate overall stats self.set_metric_values('request.ip', 'client_ip', **metric_defaults) self.set_metric_values( 'request.country', 'client_country', **metric_defaults) self.set_metric_values( 'request.city', 'client_city', **metric_defaults) self.set_metric_values( 'request.region', 'client_region', **metric_defaults) self.set_metric_values('request.ua', 'user_agent', **metric_defaults) self.set_metric_values( 'request.ua.family', 'user_agent_family', **metric_defaults) self.set_metric_values( 'response.time', 'response_time', **metric_defaults) self.set_metric_values( 'response.size', 'response_size', **metric_defaults) self.set_metric_values( 'response.status', 'response_status', **metric_defaults) self.set_metric_values( 'request.method', 'request_method', **metric_defaults) self.set_error_values( requests, valid_from, valid_to, service=service, resource=None) ows_all = OWSService.objects.get(name=OWSService.OWS_ALL) # for each resource we should calculate another set of stats for resource, _requests in [(None, requests,)] + resources: count = _requests.count() ows_services = self.extract_ows_services(_requests) metric_defaults['resource'] = resource metric_defaults['requests'] = _requests metric_defaults['ows_service'] = None MetricValue.add('request.count', valid_from, valid_to, service, 'Count', value=count, value_num=count, samples_count=count, value_raw=count, resource=resource) self.set_metric_values( 'request.ip', 'client_ip', **metric_defaults) self.set_metric_values( 'request.country', 'client_country', **metric_defaults) self.set_metric_values( 'request.city', 'client_city', **metric_defaults) self.set_metric_values( 'request.region', 'client_region', **metric_defaults) self.set_metric_values( 'request.ua', 'user_agent', **metric_defaults) self.set_metric_values( 'request.ua.family', 'user_agent_family', **metric_defaults) self.set_metric_values( 'response.time', 'response_time', **metric_defaults) self.set_metric_values( 'response.size', 'response_size', **metric_defaults) self.set_metric_values( 'response.status', 'response_status', **metric_defaults) self.set_metric_values( 'request.method', 'request_method', **metric_defaults) self.set_error_values( _requests, valid_from, valid_to, service=service, resource=resource) # ows_services may be subset of all requests in a batch, so we do # calculation separately if ows_services: ows_requests = _requests.filter(ows_service__isnull=False) count = ows_requests.count() metric_defaults['requests'] = ows_requests metric_defaults['ows_service'] = ows_all print(MetricValue.add('request.count', valid_from, valid_to, service, 'Count', value=count, value_num=count, samples_count=count, value_raw=count, resource=resource, ows_service=ows_all)) self.set_metric_values( 'request.ip', 'client_ip', **metric_defaults) self.set_metric_values( 'request.country', 'client_country', **metric_defaults) self.set_metric_values( 'request.city', 'client_city', **metric_defaults) self.set_metric_values( 'request.region', 'client_region', **metric_defaults) self.set_metric_values( 'request.ua', 'user_agent', **metric_defaults) self.set_metric_values( 'request.ua.family', 'user_agent_family', **metric_defaults) self.set_metric_values( 'response.time', 'response_time', **metric_defaults) self.set_metric_values( 'response.size', 'response_size', **metric_defaults) self.set_metric_values( 'response.status', 'response_status', **metric_defaults) self.set_metric_values( 'request.method', 'request_method', **metric_defaults) for ows_service in ows_services: ows_requests = _requests.filter(ows_service=ows_service) paths = ows_requests.distinct( 'request_path').values_list('request_path', flat=True) for path in paths: count = ows_requests.filter(request_path=path).count() print MetricValue.add('request.path', valid_from, valid_to, service, path, value=count, value_num=count, value_raw=count, samples_count=count, resource=resource) count = ows_requests.count() metric_defaults['ows_service'] = ows_service metric_defaults['requests'] = ows_requests print(MetricValue.add('request.count', valid_from, valid_to, service, 'Count', value=count, value_num=count, samples_count=count, value_raw=count, resource=resource, ows_service=ows_service)) self.set_metric_values( 'request.ip', 'client_ip', **metric_defaults) self.set_metric_values( 'request.country', 'client_country', **metric_defaults) self.set_metric_values( 'request.city', 'client_city', **metric_defaults) self.set_metric_values( 'request.region', 'client_region', **metric_defaults) self.set_metric_values( 'request.ua', 'user_agent', **metric_defaults) self.set_metric_values( 'request.ua.family', 'user_agent_family', **metric_defaults) self.set_metric_values( 'response.time', 'response_time', **metric_defaults) self.set_metric_values( 'response.size', 'response_size', **metric_defaults) self.set_metric_values( 'response.status', 'response_status', **metric_defaults) self.set_metric_values( 'request.method', 'request_method', **metric_defaults) self.set_error_values(ows_requests, valid_from, valid_to, service=service, resource=resource, ows_service=ows_service)
def test_monitoring_checks(self): start = datetime.utcnow().replace(tzinfo=pytz.utc) start_aligned = align_period_start(start, self.service.check_interval) end_aligned = start_aligned + self.service.check_interval # sanity check self.assertTrue(start_aligned < start < end_aligned) ows_service = OWSService.objects.get(name='WFS') resource, _ = MonitoredResource.objects.get_or_create( type='layer', name='test:test') resource2, _ = MonitoredResource.objects.get_or_create( type='layer', name='test:test2') label, _ = MetricLabel.objects.get_or_create(name='discount') MetricValue.add(self.metric, start_aligned, end_aligned, self.service, label="Count", value_raw=10, value_num=10, value=10) uthreshold = [( self.metric.name, 'min_value', False, False, False, False, 0, 100, None, "Min number of request"), (self.metric.name, 'max_value', False, False, False, False, 1000, None, None, "Max number of request"), ] notification_data = {'name': 'check requests name', 'description': 'check requests description', 'severity': 'warning', 'user_threshold': uthreshold} nc = NotificationCheck.create(**notification_data) mc = MetricNotificationCheck.objects.create(notification_check=nc, service=self.service, metric=self.metric, min_value=None, definition=nc.definitions.first( ), max_value=None, max_timeout=None) with self.assertRaises(ValueError): mc.check_metric(for_timestamp=start) mc.min_value = 11 mc.save() with self.assertRaises(mc.MetricValueError): mc.check_metric(for_timestamp=start) mc.min_value = 1 mc.max_value = 11 mc.save() self.assertTrue(mc.check_metric(for_timestamp=start)) MetricValue.add(self.metric, start_aligned, end_aligned, self.service, label="discount", value_raw=10, value_num=10, value=10, ows_service=ows_service) mc.min_value = 11 mc.max_value = None mc.ows_service = ows_service mc.save() with self.assertRaises(mc.MetricValueError): mc.check_metric(for_timestamp=start) MetricValue.add(self.metric, start_aligned, end_aligned, self.service, label="discount", value_raw=10, value_num=10, value=10, resource=resource) mc.min_value = 1 mc.max_value = 10 mc.ows_service = None mc.resource = resource mc.save() self.assertTrue(mc.check_metric(for_timestamp=start)) MetricValue.objects.all().delete() MetricValue.add(self.metric, start_aligned, end_aligned, self.service, label="discount", value_raw=10, value_num=10, value=10, resource=resource2) # this should raise ValueError, because MetricValue won't match with self.assertRaises(ValueError): mc.check_metric(for_timestamp=start)
def process_host_geonode(self, service, data, valid_from, valid_to): """ Generates mertic values for system-level measurements """ utc = pytz.utc import dateutil.parser collected_at = parse_datetime(dateutil.parser.parse(data['timestamp']) .strftime("%Y-%m-%d %H:%M:%S")).replace(tzinfo=utc) valid_from = align_period_start(collected_at, service.check_interval) valid_to = align_period_end(collected_at, service.check_interval) mdefaults = {'valid_from': valid_from, 'valid_to': valid_to, 'resource': None, 'samples_count': 1, 'service': service} MetricValue.objects.filter(service_metric__metric__name__in=('network.in', 'network.out'), valid_from=valid_from, valid_to=valid_to, service=service)\ .delete() for ifname, ifdata in data['data']['network'].iteritems(): for tx_label, tx_value in ifdata['traffic'].items(): mdata = {'value': tx_value, 'value_raw': tx_value, 'value_num': tx_value, 'label': ifname, 'metric': 'network.{}'.format(tx_label)} mdata.update(mdefaults) rate = self._calculate_rate( mdata['metric'], ifname, tx_value, valid_to) print MetricValue.add(**mdata) if rate: mdata['metric'] = '{}.rate'.format(mdata['metric']) mdata['value'] = rate mdata['value_num'] = rate mdata['value_raw'] = rate print MetricValue.add(**mdata) ldata = data['data']['load'] llabel = ['1', '5', '15'] memory_info = data['data']['memory'] mkeys = [m.name[len('mem.'):] for m in service.get_metrics() if m.name.startswith('mem.')] for mkey in mkeys: mdata = memory_info.get(mkey) if not mdata: continue mdata = {'value': mdata, 'value_raw': mdata, 'value_num': mdata, 'metric': 'mem.{}'.format(mkey), 'label': 'B', } mdata.update(mdefaults) MetricValue.objects.filter(service_metric__metric__name=mdata['metric'], valid_from=mdata['valid_from'], valid_to=mdata['valid_to'], label__name='MB', service=service)\ .delete() print MetricValue.add(**mdata) MetricValue.objects.filter(service_metric__metric__name__in=('storage.total', 'storage.used', 'storage.free',), valid_from=valid_from, valid_to=valid_to, service=service)\ .delete() for df in data['data']['disks']: # dev = df['device'] total = df['total'] used = df['used'] free = df['free'] # free_pct = df['percent'] mount = df['mountpoint'] for metric, val in (('storage.total', total,), ('storage.used', used,), ('storage.free', free,),): mdata = {'value': val, 'value_raw': val, 'value_num': val, 'metric': metric, 'label': mount, } mdata.update(mdefaults) print MetricValue.add(**mdata) if ldata: for lidx, l in enumerate(ldata): mdata = {'value': l, 'value_raw': l, 'value_num': l, 'metric': 'load.{}m'.format(llabel[lidx]), 'label': 'Value', } mdata.update(mdefaults) MetricValue.objects.filter(service_metric__metric__name=mdata['metric'], valid_from=mdata['valid_from'], valid_to=mdata['valid_to'], label__name='Value', service=service)\ .delete() print MetricValue.add(**mdata) uptime = data['data'].get('uptime') if uptime is not None: mdata = {'value': uptime, 'value_raw': uptime, 'value_num': uptime, 'metric': 'uptime', 'label': 'Seconds'} mdata.update(mdefaults) MetricValue.objects.filter(service_metric__metric__name=mdata['metric'], valid_from=mdata['valid_from'], valid_to=mdata['valid_to'], label__name=mdata['label'], service=service)\ .delete() print MetricValue.add(**mdata) if data['data'].get('cpu'): _l = data['data']['cpu']['usage'] mdata = {'value': _l, 'value_raw': _l, 'value_num': _l, 'metric': 'cpu.usage', 'label': 'Seconds', } mdata.update(mdefaults) MetricValue.objects.filter(service_metric__metric__name=mdata['metric'], valid_from=mdata['valid_from'], valid_to=mdata['valid_to'], label__name=mdata['label'], service=service)\ .delete() print MetricValue.add(**mdata) rate = self._calculate_rate( mdata['metric'], mdata['label'], mdata['value'], mdata['valid_to']) if rate: rate_data = mdata.copy() rate_data['metric'] = '{}.rate'.format(mdata['metric']) rate_data['value'] = rate rate_data['value_num'] = rate rate_data['value_raw'] = rate print MetricValue.add(**rate_data) percent = self._calculate_percent( mdata['metric'], mdata['label'], mdata['value'], mdata['valid_to']) if percent: percent_data = mdata.copy() percent_data['metric'] = '{}.percent'.format(mdata['metric']) percent_data['value'] = percent percent_data['value_num'] = percent percent_data['value_raw'] = percent percent_data['label'] = 'Value' print MetricValue.add(**percent_data) mdata.update(mdefaults) print MetricValue.add(**mdata)
def test_notifications_api(self): capi = CollectorAPI() start = datetime.utcnow().replace(tzinfo=pytz.utc) start_aligned = align_period_start(start, self.service.check_interval) end_aligned = start_aligned + self.service.check_interval # for (metric_name, field_opt, use_service, # use_resource, use_label, use_ows_service, # minimum, maximum, thresholds,) in thresholds: notifications_config = ( 'geonode is not working', 'detects when requests are not handled', ( ( 'request.count', 'min_value', False, False, False, False, 0, 10, None, 'Number of handled requests is lower than', ), ( 'response.time', 'max_value', False, False, False, False, 500, None, None, 'Response time is higher than', ), )) nc = NotificationCheck.create(*notifications_config) self.assertTrue(nc.definitions.all().count() == 2) user = self.u2 pwd = self.passwd2 self.client.login(username=user.username, password=pwd) for nc in NotificationCheck.objects.all(): notifications_config_url = reverse( 'monitoring:api_user_notification_config', args=(nc.id, )) nc_form = nc.get_user_form() self.assertTrue(nc_form) self.assertTrue(nc_form.fields.keys()) vals = [1000000, 100000] data = {'emails': []} data['emails'] = '\n'.join(data['emails']) idx = 0 for fname, field in nc_form.fields.items(): if fname in self.reserved_fields: continue data[fname] = vals[idx] idx += 1 resp = self.client.post(notifications_config_url, data) self.assertEqual(resp.status_code, 400) vals = [7, 600] data = { 'emails': '\n'.join([self.u.email, self.u2.email, '*****@*****.**']) } idx = 0 for fname, field in nc_form.fields.items(): if fname in self.reserved_fields: continue data[fname] = vals[idx] idx += 1 # data['emails'] = '\n'.join(data['emails']) resp = self.client.post(notifications_config_url, data) nc.refresh_from_db() self.assertEqual(resp.status_code, 200, resp) _emails = data['emails'].split('\n')[-1:] _users = data['emails'].split('\n')[:-1] self.assertEqual(set([u.email for u in nc.get_users()]), set(_users)) self.assertEqual(set([email for email in nc.get_emails()]), set(_emails)) metric_rq_count = Metric.objects.get(name='request.count') metric_rq_time = Metric.objects.get(name='response.time') MetricValue.add(metric_rq_count, start_aligned, end_aligned, self.service, label="Count", value_raw=0, value_num=0, value=0) MetricValue.add(metric_rq_time, start_aligned, end_aligned, self.service, label="Count", value_raw=700, value_num=700, value=700) nc = NotificationCheck.objects.get() self.assertTrue(len(nc.get_emails()) > 0) self.assertTrue(len(nc.get_users()) > 0) self.assertEqual(nc.last_send, None) self.assertTrue(nc.can_send) self.assertEqual(len(mail.outbox), 0) # make sure inactive will not trigger anything nc.active = False nc.save() capi.emit_notifications(start) self.assertEqual(len(mail.outbox), 0) nc.active = True nc.save() capi.emit_notifications(start) self.assertTrue(nc.receivers.all().count() > 0) self.assertEqual(len(mail.outbox), nc.receivers.all().count()) nc.refresh_from_db() notifications_url = reverse('monitoring:api_user_notifications') nresp = self.client.get(notifications_url) self.assertEqual(nresp.status_code, 200) ndata = json.loads(nresp.content) self.assertEqual( set([n['id'] for n in ndata['data']]), set(NotificationCheck.objects.all().values_list('id', flat=True))) self.assertTrue(isinstance(nc.last_send, datetime)) self.assertFalse(nc.can_send) mail.outbox = [] self.assertEqual(len(mail.outbox), 0) capi.emit_notifications(start) self.assertEqual(len(mail.outbox), 0) nc.last_send = start - nc.grace_period nc.save() self.assertTrue(nc.can_send) mail.outbox = [] self.assertEqual(len(mail.outbox), 0) capi.emit_notifications(start) self.assertEqual(len(mail.outbox), nc.receivers.all().count())
def test_monitoring_checks(self): start = datetime.utcnow().replace(tzinfo=pytz.utc) start_aligned = align_period_start(start, self.service.check_interval) end_aligned = start_aligned + self.service.check_interval # sanity check self.assertTrue(start_aligned < start < end_aligned) ows_service = OWSService.objects.get(name='WFS') resource, _ = MonitoredResource.objects.get_or_create(type='layer', name='test:test') resource2, _ = MonitoredResource.objects.get_or_create( type='layer', name='test:test2') label, _ = MetricLabel.objects.get_or_create(name='discount') MetricValue.add(self.metric, start_aligned, end_aligned, self.service, label="Count", value_raw=10, value_num=10, value=10) uthreshold = [ (self.metric.name, 'min_value', False, False, False, False, 0, 100, None, "Min number of request"), (self.metric.name, 'max_value', False, False, False, False, 1000, None, None, "Max number of request"), ] notification_data = { 'name': 'check requests name', 'description': 'check requests description', 'severity': 'warning', 'user_threshold': uthreshold } nc = NotificationCheck.create(**notification_data) mc = MetricNotificationCheck.objects.create( notification_check=nc, service=self.service, metric=self.metric, min_value=None, definition=nc.definitions.first(), max_value=None, max_timeout=None) with self.assertRaises(ValueError): mc.check_metric(for_timestamp=start) mc.min_value = 11 mc.save() with self.assertRaises(mc.MetricValueError): mc.check_metric(for_timestamp=start) mc.min_value = 1 mc.max_value = 11 mc.save() self.assertTrue(mc.check_metric(for_timestamp=start)) MetricValue.add(self.metric, start_aligned, end_aligned, self.service, label="discount", value_raw=10, value_num=10, value=10, ows_service=ows_service) mc.min_value = 11 mc.max_value = None mc.ows_service = ows_service mc.save() with self.assertRaises(mc.MetricValueError): mc.check_metric(for_timestamp=start) MetricValue.add(self.metric, start_aligned, end_aligned, self.service, label="discount", value_raw=10, value_num=10, value=10, resource=resource) mc.min_value = 1 mc.max_value = 10 mc.ows_service = None mc.resource = resource mc.save() self.assertTrue(mc.check_metric(for_timestamp=start)) MetricValue.objects.all().delete() MetricValue.add(self.metric, start_aligned, end_aligned, self.service, label="discount", value_raw=10, value_num=10, value=10, resource=resource2) # this should raise ValueError, because MetricValue won't match with self.assertRaises(ValueError): mc.check_metric(for_timestamp=start)
def test_notifications_api(self): capi = CollectorAPI() start = datetime.utcnow().replace(tzinfo=pytz.utc) start_aligned = align_period_start(start, self.service.check_interval) end_aligned = start_aligned + self.service.check_interval # for (metric_name, field_opt, use_service, # use_resource, use_label, use_ows_service, # minimum, maximum, thresholds,) in thresholds: notifications_config = ('geonode is not working', 'detects when requests are not handled', (('request.count', 'min_value', False, False, False, False, 0, 10, None, 'Number of handled requests is lower than',), ('response.time', 'max_value', False, False, False, False, 500, None, None, 'Response time is higher than',),)) nc = NotificationCheck.create(*notifications_config) self.assertTrue(nc.definitions.all().count() == 2) user = self.u2 pwd = self.passwd2 self.client.login(username=user.username, password=pwd) for nc in NotificationCheck.objects.all(): notifications_config_url = reverse( 'monitoring:api_user_notification_config', args=(nc.id,)) nc_form = nc.get_user_form() self.assertTrue(nc_form) self.assertTrue(nc_form.fields.keys()) vals = [1000000, 100000] data = {'emails': []} data['emails'] = '\n'.join(data['emails']) idx = 0 for fname, field in nc_form.fields.items(): if fname in self.reserved_fields: continue data[fname] = vals[idx] idx += 1 resp = self.client.post(notifications_config_url, data) self.assertEqual(resp.status_code, 400) vals = [7, 600] data = {'emails': '\n'.join( [self.u.email, self.u2.email, '*****@*****.**'])} idx = 0 for fname, field in nc_form.fields.items(): if fname in self.reserved_fields: continue data[fname] = vals[idx] idx += 1 # data['emails'] = '\n'.join(data['emails']) resp = self.client.post(notifications_config_url, data) nc.refresh_from_db() self.assertEqual(resp.status_code, 200, resp) _emails = data['emails'].split('\n')[-1:] _users = data['emails'].split('\n')[:-1] self.assertEqual( set([u.email for u in nc.get_users()]), set(_users)) self.assertEqual( set([email for email in nc.get_emails()]), set(_emails)) metric_rq_count = Metric.objects.get(name='request.count') metric_rq_time = Metric.objects.get(name='response.time') MetricValue.add(metric_rq_count, start_aligned, end_aligned, self.service, label="Count", value_raw=0, value_num=0, value=0) MetricValue.add(metric_rq_time, start_aligned, end_aligned, self.service, label="Count", value_raw=700, value_num=700, value=700) nc = NotificationCheck.objects.get() self.assertTrue(len(nc.get_emails()) > 0) self.assertTrue(len(nc.get_users()) > 0) self.assertEqual(nc.last_send, None) self.assertTrue(nc.can_send) self.assertEqual(len(mail.outbox), 0) # make sure inactive will not trigger anything nc.active = False nc.save() capi.emit_notifications(start) self.assertEqual(len(mail.outbox), 0) nc.active = True nc.save() capi.emit_notifications(start) self.assertTrue(nc.receivers.all().count() > 0) self.assertEqual(len(mail.outbox), nc.receivers.all().count()) nc.refresh_from_db() notifications_url = reverse('monitoring:api_user_notifications') nresp = self.client.get(notifications_url) self.assertEqual(nresp.status_code, 200) ndata = json.loads(nresp.content) self.assertEqual(set([n['id'] for n in ndata['data']]), set(NotificationCheck.objects.all().values_list('id', flat=True))) self.assertTrue(isinstance(nc.last_send, datetime)) self.assertFalse(nc.can_send) mail.outbox = [] self.assertEqual(len(mail.outbox), 0) capi.emit_notifications(start) self.assertEqual(len(mail.outbox), 0) nc.last_send = start - nc.grace_period nc.save() self.assertTrue(nc.can_send) mail.outbox = [] self.assertEqual(len(mail.outbox), 0) capi.emit_notifications(start) self.assertEqual(len(mail.outbox), nc.receivers.all().count())