def _cloud_sync(self, provider_id=None): valid_dict = self.valid_data[provider_id] invalid_dict = self.invalid_data[provider_id] credentials_id = cloud_credentials_model.save(data=valid_dict, provider_id=provider_id) valid_credentials = cloud_credentials_model.get_by_id(credentials_id) now = unix_utc_now() - 5 # Just in case, the case could pass in less then a second sync_credentials(credentials=valid_credentials) entry = cloud_credentials_model.get_by_id(credentials_id) assert entry['last_sync'] > now # for r in server_model.get_all(): # eq_(r['provider'], 'rackspace') # eq_(r['credentials'], entry['_id']) # Invalid credentials cloud_credentials_model.update(data=invalid_dict, id=credentials_id, provider_id=provider_id) invalid_credentials = cloud_credentials_model.get_by_id(credentials_id) now = unix_utc_now()-5 sync_credentials(credentials=invalid_credentials) entry = cloud_credentials_model.get_by_id(credentials_id) assert entry['error'] assert entry['last_sync'] > now
def test_notsendingdata_alert(self): self._cleanup() now = unix_utc_now() uptime_alert = { "rule_type": "system", "server": self.server_id, "account_id": self.account_id, "period": 0, } cpu_alert_dict = {**uptime_alert, 'metric': 'NotSendingData'} alert_id = alerts_model.collection.insert(cpu_alert_dict) server_model.update({'last_check': now - 15}, self.server_id) notsendingdata_alerter.check() unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id eq_(unsent_alerts['data'].count(), 1) self._cleanup() now = unix_utc_now() uptime_alert = { "rule_type": "global", "server": "all", "account_id": self.account_id, "period": 0, } cpu_alert_dict = {**uptime_alert, 'metric': 'NotSendingData'} alert_id = alerts_model.collection.insert(cpu_alert_dict) server_model.update({'last_check': now - 15}, self.server_id) notsendingdata_alerter.check() unsent_alerts = alerts_history_model.get_unsent() for trigger in unsent_alerts['data']: assert trigger['alert_id'] == alert_id eq_(unsent_alerts['data'].count(), 1)
def test_mute(self): self._cleanup() url = reverse('alerts_mute_servers') response = self.c.post(url,{ 'server': 'all', 'period': 1, }) result = alert_mute_servers_model.collection.find_one() assert result['expires_at_utc'] == unix_utc_now()+3600 self._cleanup() response = self.c.post(url,{ 'server': 'all', 'period': 0, }) result = alert_mute_servers_model.collection.find_one() assert result.get('expires_at_utc') == None
def add(self, name, account_id=None, check_every=60, keep_data=30, tags=[], key=None): server_key = key if key else generate_random_string(size=32) # keep_data - in days # check_every - in seconds # settings/forms/data retention data = { "name": name, "key": server_key, "account_id": account_id, "check_every": check_every, "keep_data": keep_data, "date_created": unix_utc_now(), "tags": tags } self.collection.insert(data) self.collection.ensure_index([('tags', self.desc)], background=True) self.collection.ensure_index([('key', self.desc)], background=True) self.collection.ensure_index([('name', self.desc)], background=True) self.collection.ensure_index([('last_check', self.desc)], background=True) self.collection.ensure_index([('account_id', self.desc)], background=True) self.collection.ensure_index([('instance_id', self.desc)], background=True) return server_key
def empty(self): empty_dict = {} now = unix_utc_now() for check in self.active_checks: empty_dict[check] = {'time': now, 'last': 1} return empty_dict
def __call__(self, *args, **kwargs): log_dict = args[0] try: tags = log_dict.get('tags') except: tags = None message = log_dict.get('message', '') now = unix_utc_now() self.check_tags(tags) entry = {'time': now, 'message': message, 'tags': tags} # Add the data to a separate field, for easy searching if isinstance(message, dict): self.str = "" self.flatten_dict(message) _searchable = self.str elif isinstance(message, list): _searchable = ":".join(["%s" % el for el in message]) else: _searchable = message entry['_searchable'] = _searchable self.model.save_log(entry) self.common_model.upsert_unread('logs')
def throttle_check_period_test(self): self._cleanup() buffer_period = 15 # For collectd # No data - first check server_key = server_model.add('test_name', account_id=1, check_every=60) server = server_model.get_server_by_key(server_key) result = throttle_status(server=server) assert result.allow == True self._cleanup() now = unix_utc_now() server_key = server_model.add('test_name', account_id=1, check_every=60) server = server_model.get_server_by_key(server_key) data = {'last_check': now-61} server_model.update(data, server['_id']) server = server_model.get_server_by_key(server_key) result = throttle_status(server=server) assert result.allow == True last_check_plus_buffer = now-54+buffer_period data = {'last_check': last_check_plus_buffer} server_model.update(data, server['_id']) server = server_model.get_server_by_key(server_key) result = throttle_status(server=server) assert result.allow == False self._cleanup() server_key = server_model.add('test_name', account_id=1, check_every=300) server = server_model.get_server_by_key(server_key) data = {'last_check': now-301} server_model.update(data, server['_id']) server = server_model.get_server_by_key(server_key) result = throttle_status(server=server) assert result.allow == True self._cleanup()
def save_data(self, server=None, data=None, time=None, expires_at=None): server_id = server['_id'] time = time if time else unix_utc_now() volumes_model.save_data(server=server, data=data.get('disk'), time=time, expires_at=expires_at) interfaces_model.save_data(server=server, data=data.get('network'), time=time, expires_at=expires_at) server_meta = { 'last_check': time, 'uptime': data.get('uptime', ""), } server_model.update(server_meta, server_id) cleaned_data_dict = dict([(k, v) for k, v in data.items() if k not in ['disk', 'network']]) cleaned_data_dict['time'] = time cleaned_data_dict['server_id'] = server['_id'] cleaned_data_dict["expires_at"] = expires_at self.data_collection.insert(cleaned_data_dict) self.data_collection.ensure_index([('time', self.desc)], background=True) self.data_collection.ensure_index([('server_id', self.desc)], background=True) self.data_collection.ensure_index([('expires_at', 1)], expireAfterSeconds=0)
def get_or_create_by_machine_id(self, machine_id=None, hostname=None, check_every=60, keep_data=30, instance_id=None, tags=[]): server = self.collection.find_one({"key": machine_id}) # Randomly generated instance_id = "" if instance_id == None else instance_id name = hostname if hostname else generate_haiku_name() # keep_data - in days # check_every - in seconds # settings/forms/data retention data = { "name": name, "key": machine_id, "check_every": check_every, "keep_data": keep_data, "date_created": unix_utc_now(), "tags": tags } # Bare metal servers if server == None and len(instance_id) == 0: self.collection.insert(data) server = self.collection.find_one({"key": machine_id}) # Cloud servers if len(instance_id) > 0: server = self.collection.find_one({"instance_id": instance_id}) # Cloud server synced and found if server is not None: data = {"key": machine_id} self.collection.update({"instance_id": instance_id}, {"$set": data}, upsert=True) else: data["key"] = machine_id data["instance_id"] = instance_id self.collection.insert(data) server = self.collection.find_one({"key": machine_id}) self.collection.ensure_index([('name', self.desc)], background=True) self.collection.ensure_index([('tags', self.desc)], background=True) self.collection.ensure_index([('key', self.desc)], background=True) self.collection.ensure_index([('last_check', self.desc)], background=True) self.collection.ensure_index([('account_id', self.desc)], background=True) self.collection.ensure_index([('instance_id', self.desc)], background=True) return server
def throttle_check_period_test(self): self._cleanup() buffer_period = 15 # For collectd # No data - first check server_key = server_model.add('test_name', account_id=1, check_every=60) server = server_model.get_server_by_key(server_key) result = throttle_status(server=server) assert result.allow == True self._cleanup() now = unix_utc_now() server_key = server_model.add('test_name', account_id=1, check_every=60) server = server_model.get_server_by_key(server_key) data = {'last_check': now - 61} server_model.update(data, server['_id']) server = server_model.get_server_by_key(server_key) result = throttle_status(server=server) assert result.allow == True last_check_plus_buffer = now - 54 + buffer_period data = {'last_check': last_check_plus_buffer} server_model.update(data, server['_id']) server = server_model.get_server_by_key(server_key) result = throttle_status(server=server) assert result.allow == False self._cleanup() server_key = server_model.add('test_name', account_id=1, check_every=300) server = server_model.get_server_by_key(server_key) data = {'last_check': now - 301} server_model.update(data, server['_id']) server = server_model.get_server_by_key(server_key) result = throttle_status(server=server) assert result.allow == True self._cleanup()
def processes(self): now = unix_utc_now() process_info_dict = {} for process in self.process_checks: process_info_dict[process] = process_info_collector.check_process(process) process_info_dict[process]['time'] = now return process_info_dict
def save(self): cleaned_data = self.cleaned_data new_invite_email = cleaned_data['email'] data = { 'email': new_invite_email, 'invited_by': self.user.id, 'sent': unix_utc_now() } invitation_code_string = "{0}{1}{2}".format(self.user.id, new_invite_email , unix_utc_now()) encoded_invitation_code = invitation_code_string.encode() data['invitation_code'] = hashlib.sha224(encoded_invitation_code).hexdigest() invite_model.create_invitation(data=data) return data
def processes(self): now = unix_utc_now() process_info_dict = {} for process in self.process_checks: process_info_dict[process] = process_info_collector.check_process( process) process_info_dict[process]['time'] = now return process_info_dict
def save_uptime_occurence(self, alert, data=None): time = unix_utc_now() data = {"value": 1, "time": time, "trigger": True} server_id = alert['server']['_id'] self.alert_history_model.save(alert=alert, server_id=server_id, data=data)
def save_system_occurence(self, alerts, server_id=None): server = server_model.get_by_id(server_id) # Format: {'cpu': [{'value': 2.6899999999999977, 'rule': '4f55da92925d75158d0001e0'}}]} for key, values_list in alerts.items(): for value in values_list: data = {} save_to_db = False alert_on = value.get('value', None) trigger = value.get('trigger', False) rule_id = value.get('rule', None) time = value.get('time', None) if time is None: time = unix_utc_now() alert_on = "{0:.2f}".format(float(alert_on)) alert = self.get_by_id(rule_id) data = { "value": float(alert_on), "time": time, "trigger": trigger } if key == 'disk': volume = value.get('volume', None) volume_data = volumes_model.get_by_name(server, volume) if volume_data: data['volume'] = volume_data.get('_id', None) if key == 'network': interface = value.get('interface', None) interface_data = interfaces_model.get_by_name( server, interface) if interface_data: data['interface'] = interface_data.get('_id', None) server_id = self.mongo.get_object_id(server_id) # Check for tagged global alerts alert_server = alert.get('server') if alert_server == 'all': if self._server_tags_in_alert(server=server, alert=alert): save_to_db = True else: save_to_db = True if save_to_db: self.alert_history_model.save(alert=alert, server_id=server_id, data=data)
def save(self): cleaned_data = self.cleaned_data new_invite_email = cleaned_data['email'] data = { 'email': new_invite_email, 'invited_by': self.user.id, 'sent': unix_utc_now() } invitation_code_string = "{0}{1}{2}".format(self.user.id, new_invite_email, unix_utc_now()) encoded_invitation_code = invitation_code_string.encode() data['invitation_code'] = hashlib.sha224( encoded_invitation_code).hexdigest() invite_model.create_invitation(data=data) return data
def post(self, request, server_key): plugin_dict = {} date_now = datetime.utcnow() time_now = unix_utc_now() ignored_plugins = ['irq'] accepted_types = [ 'gauge', ] status = settings.API_RESULTS['not-found'] data = request.data if request.server: server = request.server expires_days = server.get('keep_data', 30) expires_at = date_now + timedelta(days=expires_days) for p in data: plugin_name = p.get('plugin') plugin_instance = p.get('plugin_instance') dsnames = p.get('dsnames') values = p.get('values') dstypes = p.get('dstypes') name = "collectd.{0}".format(plugin_name) plugin_dict[name] = {} for dsn, v, dstype in zip(dsnames, values, dstypes): if dstype in accepted_types and plugin_name not in ignored_plugins: value_name = "{0}.{1}".format( plugin_instance, dsn) if plugin_instance else dsn value_name = "{0}.{1}".format( plugin_name, value_name) if dsn == 'value' else value_name plugin_dict[name][value_name] = v if len(plugin_dict) > 0: for name, data in plugin_dict.iteritems(): if len(data) > 0: plugin_data = {'gauges': data, 'counters': {}} plugin_model.save_data(server=server, name=name, data=plugin_data, time=time_now, expires_at=expires_at) return Response({'status': status})
def post(self, request, server_key): plugin_dict = {} date_now = datetime.utcnow() time_now = unix_utc_now() ignored_plugins = ['irq'] accepted_types = ['gauge',] status = settings.API_RESULTS['not-found'] data = request.data if request.server: server = request.server expires_days = server.get('keep_data', 30) expires_at = date_now + timedelta(days=expires_days) for p in data: plugin_name = p.get('plugin') plugin_instance = p.get('plugin_instance') dsnames = p.get('dsnames') values = p.get('values') dstypes = p.get('dstypes') name = "collectd.{0}".format(plugin_name) plugin_dict[name] = {} for dsn, v, dstype in zip(dsnames, values, dstypes): if dstype in accepted_types and plugin_name not in ignored_plugins: value_name = "{0}.{1}".format(plugin_instance, dsn) if plugin_instance else dsn value_name = "{0}.{1}".format(plugin_name, value_name) if dsn == 'value' else value_name plugin_dict[name][value_name] = v if len(plugin_dict) > 0: for name, data in plugin_dict.iteritems(): if len(data) > 0: plugin_data = {'gauges': data, 'counters':{}} plugin_model.save_data( server=server, name=name, data=plugin_data, time=time_now, expires_at=expires_at ) return Response({'status': status})
def save(self, data=None, server=None): now = unix_utc_now() date_now = datetime.utcnow() expires_at = date_now + timedelta(days=2) for i, check in enumerate(data): command = check.get('command') check_id = health_checks_model.save( server=server, command=command ) check_id = self.object_id(check_id) exit_codes = {0: "ok", 1: "warning", 2: "critical"} try: status = exit_codes[check["exit_code"]] except: status = "unknown" error = check.get('error') output = check.get('output', "").strip() params = { 'check_id': check_id, 'time': now, 'output': output, 'status': status, 'error': error, 'expires_at': expires_at, } health_checks_data_id = self.collection.insert(params) self.collection.ensure_index([('expires_at', 1)], expireAfterSeconds=0) self.collection.ensure_index([('time', self.desc)]) self.collection.ensure_index([('check_id', self.desc)]) last_check = { 'time': now, 'output': output, 'status': status, 'error': error } health_checks_model.save_last_result(check_id=check_id, last_check=last_check, timestamp=now) data[i]['health_checks_data_id'] = health_checks_data_id return data
def save(self, data=None, server=None): now = unix_utc_now() date_now = datetime.utcnow() expires_at = date_now + timedelta(days=2) for i, check in enumerate(data): command = check.get('command') check_id = health_checks_model.save(server=server, command=command) check_id = self.object_id(check_id) exit_codes = {0: "ok", 1: "warning", 2: "critical"} try: status = exit_codes[check["exit_code"]] except: status = "unknown" error = check.get('error') output = check.get('output', "").strip() params = { 'check_id': check_id, 'time': now, 'output': output, 'status': status, 'error': error, 'expires_at': expires_at, } health_checks_data_id = self.collection.insert(params) self.collection.ensure_index([('expires_at', 1)], expireAfterSeconds=0) self.collection.ensure_index([('time', self.desc)]) self.collection.ensure_index([('check_id', self.desc)]) last_check = { 'time': now, 'output': output, 'status': status, 'error': error } health_checks_model.save_last_result(check_id=check_id, last_check=last_check, timestamp=now) data[i]['health_checks_data_id'] = health_checks_data_id return data
def save_error(self, data=None, server=None, plugin=None, time=None): collection = self._get_error_collection() params = {'plugin_id': plugin['_id'], 'server_id': server['_id']} collection.remove(params) params['t'] = time if time else unix_utc_now() params['error'] = data collection.insert(params) collection.ensure_index([('server_id', self.desc)], background=True) collection.ensure_index([('plugin_id', self.desc)], background=True) collection.ensure_index([('t', self.desc)], background=True)
def get_active_last_five_minutes(self, account_id=None, count=None): five_minutes_ago = unix_utc_now() - 300 params = {"last_check": {"$gte": five_minutes_ago}} if account_id: params['account_id'] = account_id result = self.collection.find(params) if count: result = result.count() return result
def finalize_response(self, request, response, *args, **kwargs): request_data = { 'remote_address': request.META['REMOTE_ADDR'], 'request_method': request.method, 'request_path': request.get_full_path(), 'time': unix_utc_now() } api_history_model.add(request_data) return super(SaveRequestHistoryMixin, self).finalize_response(request, response, *args, **kwargs)
def check(self): time_now = unix_utc_now() alerts = alerts_model.get_alerts_not_sending_data() for alert in alerts: period = alert.get('period') for server in alert.get('server_data'): last_check = server.get('last_check') # Skip all the servers with no agent installed if last_check != None: since_last_check = time_now - last_check # 65 seconds, 60 seconds sleep, 5 seconds to collect if since_last_check > (period + 10): # Trigger alert, add 10 seconds buffer alert['server'] = server alerts_model.save_notsendingdata_occurence(alert=alert)
def __call__(self, *args, **kwargs): now = unix_utc_now() exception_dict = args[0] exception_class = exception_dict.get('exception_class', '') url = exception_dict.get('url', '') backtrace = exception_dict.get('backtrace', '') message = exception_dict.get('message', '') enviroment = exception_dict.get('enviroment', '') data = exception_dict.get('data', '') exception_string = "{0}{1}{2}".format(exception_class, url, backtrace) exception_id = md5(exception_string).hexdigest() additional_data = {'occurrence': now} if message: additional_data['message'] = message if enviroment: additional_data['enviroment'] = enviroment if data: additional_data['data'] = data exception_in_db = self.model.collection.find_one( {"exception_id": exception_id}) if exception_in_db is not None: exception_in_db['last_occurrence'] = now exception_in_db['additional_data'].insert(0, additional_data) exception_in_db[ 'total_occurrences'] = exception_in_db['total_occurrences'] + 1 self.model.collection.update({'_id': exception_in_db['_id']}, exception_in_db) else: entry = { 'last_occurrence': now, 'exception_id': exception_id, 'exception_class': exception_class, 'url': url, 'backtrace': backtrace, } entry['additional_data'] = [additional_data] entry['total_occurrences'] = 1 self.model.save_exception(entry) self.common_model.upsert_unread('exceptions')
def view(request): now = unix_utc_now() sort_by = request.GET.get('sort_by') filter_by = request.GET.get('filter_by') result = health_checks_model.sort_and_filter(sort_by=sort_by, filter_by=filter_by) return render(request, 'healthchecks/view.html', { "all_checks": result.all_checks, "now": now, "sort_by": sort_by, "filter_by": filter_by, "sorted_result": result.sorted_result, "flat_list": result.flat_list, "count_statuses": result.count_statuses, })
def get_unsent(self, server_id=None): hour_ago = unix_utc_now()-3600 query_params = {'sent': False, "time": {"$gte": int(hour_ago)}} if server_id: query_params['server_id'] = server_id results = self.collection.find(query_params) data = { 'data': results.clone(), 'count': results.count() } return data
def save_table_data(self, data=None, server=None, plugin=None, find_by_string=None, table_name=None, unique_hash=False): if data is None: return collection = self._get_table_data_collection(plugin=plugin, table_name=table_name) params = {'plugin_id': plugin['_id'], 'server_id': server['_id']} header_row = data.get('headers') table_data = data.get('data') if header_row is None or table_data is None: return for row in data.get('data'): result = dict(zip(header_row, row)) name = result.get(find_by_string) update_params = {find_by_string: name} # Converts SQL queries to MD5 for faster search and identification if unique_hash: unique_hash = create_unique_hash(name) result['unique_hash'] = unique_hash update_params = {"unique_hash": unique_hash} result['last_update'] = unix_utc_now() result.update(params) collection.update(update_params, {"$set": result}, upsert=True) # find_by_string is different for different datasets - ns, name, data, table_name, etc if unique_hash: collection.ensure_index([('unique_hash', self.desc)], background=True) else: collection.ensure_index([(find_by_string, self.desc)], background=True) collection.ensure_index([('last_update', self.desc)], background=True) collection.ensure_index([('server_id', self.desc)], background=True) collection.ensure_index([('plugin_id', self.desc)], background=True)
def has_permission(self, request, view): api_key = request.query_params.get('api_key') is_valid = False params = {'key': api_key} result = api_key_model.get_one(params=params) key = result.get('key', False) if key is not False: is_valid = True api_key_model.update({'last_used': unix_utc_now()}, result['_id']) if request.user.is_authenticated(): is_valid = True return is_valid
def has_permission(self, request, view): api_key = request.query_params.get('api_key') is_valid = False params = {'key': api_key} result = api_key_model.get_one(params=params) key = result.get('key', False) if key is not False: is_valid = True api_key_model.update({'last_used': unix_utc_now()}, result['_id']) if request.user.is_authenticated: is_valid = True return is_valid
def check(self): time_now = unix_utc_now() alerts = alerts_model.get_alerts_not_sending_data() for alert in alerts: period = alert.get('period') for server in alert.get('server_data'): last_check = server.get('last_check') # Skip all the servers with no agent installed if last_check != None: since_last_check = time_now - last_check # 65 seconds, 60 seconds sleep, 5 seconds to collect if since_last_check > ( period + 10): # Trigger alert, add 10 seconds buffer alert['server'] = server alerts_model.save_notsendingdata_occurence(alert=alert)
def __call__(self, *args, **kwargs): now = unix_utc_now() exception_dict = args[0] exception_class = exception_dict.get('exception_class', '') url = exception_dict.get('url', '') backtrace = exception_dict.get('backtrace', '') message= exception_dict.get('message', '') enviroment = exception_dict.get('enviroment', '') data = exception_dict.get('data', '') exception_string = "{0}{1}{2}".format(exception_class, url, backtrace) exception_id = md5(exception_string).hexdigest() additional_data = {'occurrence': now} if message: additional_data['message'] = message if enviroment: additional_data['enviroment'] = enviroment if data: additional_data['data'] = data exception_in_db = self.model.collection.find_one({"exception_id" : exception_id}) if exception_in_db is not None: exception_in_db['last_occurrence'] = now exception_in_db['additional_data'].insert(0, additional_data) exception_in_db['total_occurrences'] = exception_in_db['total_occurrences']+1 self.model.collection.update({'_id' : exception_in_db['_id']}, exception_in_db) else: entry = {'last_occurrence': now, 'exception_id': exception_id, 'exception_class': exception_class, 'url': url, 'backtrace' : backtrace, } entry['additional_data'] = [additional_data] entry['total_occurrences'] = 1 self.model.save_exception(entry) self.common_model.upsert_unread('exceptions')
def system(self): system_info_dict = {} now = unix_utc_now() # unix time if 'memory' in self.active_checks: memory = system_info_collector.get_memory_info() if memory != False: memory['time'] = now system_info_dict['memory'] = memory if 'cpu' in self.active_checks: cpu = system_info_collector.get_cpu_utilization() if cpu != False: cpu['time'] = now system_info_dict['cpu'] = cpu if 'loadavg' in self.active_checks: loadavg = system_info_collector.get_load_average() if loadavg != False: loadavg['time'] = now system_info_dict['loadavg'] = loadavg if 'disk' in self.active_checks: disk = system_info_collector.get_disk_usage() if disk != False: disk['time'] = now system_info_dict['disk'] = disk if 'network' in self.active_checks and sys.platform != 'darwin': network = system_info_collector.get_network_traffic() if network != False: network['time'] = now system_info_dict['network'] = network return system_info_dict
def view(request): now = unix_utc_now() sort_by = request.GET.get('sort_by') filter_by = request.GET.get('filter_by') result = health_checks_model.sort_and_filter(sort_by=sort_by, filter_by=filter_by) return render( request, 'healthchecks/view.html', { "all_checks": result.all_checks, "now": now, "sort_by": sort_by, "filter_by": filter_by, "sorted_result": result.sorted_result, "flat_list": result.flat_list, "count_statuses": result.count_statuses, })
def save_counters(self, data=None, server=None, plugin=None, time=None): if data is None: return formated_data = flat_to_tree_dict_helper(data) collection = self._get_counters_collection() if len(formated_data) > 0: params = {'plugin_id': plugin['_id'], 'server_id': server['_id']} collection.remove(params) formated_data['plugin_id'] = plugin['_id'] formated_data['server_id'] = server['_id'] formated_data['t'] = time if time else unix_utc_now() collection.insert(formated_data) collection.ensure_index([('server_id', self.desc)], background=True) collection.ensure_index([('plugin_id', self.desc)], background=True)
def throttle_status(server=None): result = AmonStruct() result.allow = False last_check = server.get('last_check') server_check_period = server.get('check_every', 60) if last_check: period_since_last_check = unix_utc_now() - last_check # Add 15 seconds buffer, for statsd period_since_last_check = period_since_last_check + 15 if period_since_last_check >= server_check_period: result.allow = True else: result.allow = True # Never checked return result
def save_occurence(self, alert, server_id=None): alert_id = alert.get('alert_id') alert_on = alert.get('value', None) trigger = alert.get('trigger', False) alert_on = "{0:.2f}".format(float(alert_on)) time = alert.get('time', None) if time is None: time = unix_utc_now() data = {"value": float(alert_on), "time": time, "trigger": trigger} alert = self.get_by_id(alert_id) # Global alerts here if server_id: self.alert_history_model.save(alert=alert, server_id=server_id, data=data) else: self.alert_history_model.save(alert=alert, data=data)
def save_healtcheck_occurence(self, trigger=None, server_id=None): time = trigger.get('time', None) alert_id = trigger.get('alert_id') trigger_state = trigger.get('trigger', False) health_checks_data_id = trigger.get("health_checks_data_id") # For the test suite, add an option to overwrite time if time is None: time = unix_utc_now() data = { "value": 1, "time": time, "trigger": trigger_state, "health_checks_data_id": health_checks_data_id # Save a reference to the actual result } alert = self.get_by_id(alert_id) self.alert_history_model.save(alert=alert, server_id=server_id, data=data)
def test_mute(self): self._cleanup() url = reverse('alerts_mute_servers') response = self.c.post(url, { 'server': 'all', 'period': 1, }) result = alert_mute_servers_model.collection.find_one() assert result['expires_at_utc'] == unix_utc_now() + 3600 self._cleanup() response = self.c.post(url, { 'server': 'all', 'period': 0, }) result = alert_mute_servers_model.collection.find_one() assert result.get('expires_at_utc') == None
def update_last_sync(self, credentials_id=None): data = {'last_sync': unix_utc_now()} credentials_id = self.object_id(credentials_id) self.collection.update({"_id": credentials_id}, {"$set": data}, upsert=True)
def save_data(self, server=None, data=None, time=None, expires_at=None): time = time if time else unix_utc_now() if type(data) == list: formated_data = {} # New agent, format data # the legacy format will be removed in the future and this is going to be the default for process in data: name = process.get('name') # Check if this already exists exists = formated_data.get(name, False) if exists: existing_memory = formated_data[name].get('memory_mb') current_iteration_memory = process.get('memory_mb') # Overwrite if existing_memory < current_iteration_memory: formated_data[name] = process else: formated_data[name] = process data = formated_data ignored_cpu_value = float(0) ignored_memory = float(0) ignored_processes = [] process_data = [] ignored_data = {} total_processes = len(data.items()) total_cpu = 0 total_memory = 0 all_processes = self.get_all_for_server(server['_id']) all_processes_dict = {} for p in all_processes: process_name = p.get('name', None) if process_name: all_processes_dict[process_name] = p.get('_id') # {'process': {'cpu': 10, 'memory_mb': 10, 'kb_read': 10}} for name, value in data.items(): cpu_value = self.format_float(value['cpu']) memory_value = self.format_float(value['memory_mb']) try: read_value = self.format_float(value['kb_read']) write_value = self.format_float(value['kb_write']) except: read_value = 0 write_value = 0 process_id = all_processes_dict.get(name) if process_id == None: if self.is_ignored(name): ignored_cpu_value = ignored_cpu_value+cpu_value ignored_memory = ignored_memory+memory_value ignored_data = {"name": name, "c": cpu_value, "m": memory_value} name = "{0}::{1}::{2}".format(name, cpu_value, memory_value) ignored_processes.append(name) else: process_id = self.collection.insert({'server': server['_id'], "name": name }) if process_id and (cpu_value+memory_value) > 2: process_dict = { "n": name, "p": process_id, "c": cpu_value, "m": memory_value, "r": read_value, "w": write_value } process_data.append(process_dict) self.update({'last_check': time}, process_id) self.collection.ensure_index([('last_check', self.asc)], background=True) if (ignored_memory+ignored_cpu_value) > 0: ignored_data = { 'm': self.format_float(ignored_memory), 'c': self.format_float(ignored_cpu_value) } total_memory = total_memory+memory_value total_cpu = total_cpu+cpu_value user_processes = int(total_processes-len(ignored_processes)) process_data_by_cpu = sorted(process_data, key=lambda k: k['c'], reverse=True) process_data_by_memory = sorted(process_data, key=lambda k: k['m'], reverse=True) if len(process_data_by_cpu) < 3: top_cpu = process_data_by_cpu[0:] top_memory = process_data_by_memory[0:] else: top_cpu = process_data_by_cpu[0:3] top_memory = process_data_by_memory[0:3] process_data_dict = { "t": time, "server_id": server['_id'], "data": process_data, "expires_at": expires_at, "total_processes": total_processes, "user_processes": user_processes, "total_cpu": self.format_float(total_cpu), "total_memory": self.format_float(total_memory), "top_cpu": top_cpu, "top_memory": top_memory, } self.data_collection.insert(process_data_dict) self.data_collection.ensure_index([('t', self.asc)], background=True) self.data_collection.ensure_index([('server_id', self.asc)], background=True) self.data_collection.ensure_index([('expires_at', 1)], expireAfterSeconds=0) # Delete old data self.ignored_processes_collection.remove({'t': {"$lt": time}, 'server_id': server['_id']}) self.ignored_processes_collection.insert({ "t": time, "server_id": server['_id'], "total_cpu": ignored_data.get('c'), "total_memory": ignored_data.get('m'), "data": sorted(ignored_processes), }) self.ignored_processes_collection.ensure_index([('t', self.asc)], background=True) self.ignored_processes_collection.ensure_index([('server_id', self.asc)], background=True) return process_data_dict
def add(self, data=None): data['created'] = unix_utc_now() self.collection.insert(data) self.collection.ensure_index([('created', self.desc)], background=True)
def save_data_to_backend(self, data=None, server=None): if server is None: return time_now = unix_utc_now() date_now = datetime.utcnow() expires_days = server.get('keep_data', 30) if settings.KEEP_DATA is not None: expires_days = settings.KEEP_DATA expires_at = date_now + timedelta(days=expires_days) system_data = data.get('system') process_data = data.get('processes') plugin_data = data.get('plugins') checks_data = data.get('checks') telegraf_data = data.get('series') if telegraf_data: formated_data = plugin_model.format_telegraf_to_amon(data=telegraf_data) if len(formated_data) > 0: for name, d in formated_data.items(): plugin = plugin_model.save_data( server=server, name=name, data=d, time=time_now, expires_at=expires_at ) if system_data: system_model.save_data( server=server, data=system_data.copy(), time=time_now, expires_at=expires_at ) server_alerter.check(data=system_data, server=server) if process_data: data = process_model.save_data( server=server, data=process_data, time=time_now, expires_at=expires_at ) process_alerter.check(data=data, server=server) uptime_alerter.check(data=data, server=server) if plugin_data: formated_data = plugin_model.flatten_plugin_data(data=plugin_data) for name, data in formated_data.items(): plugin = plugin_model.save_data( server=server, name=name, data=data, time=time_now, expires_at=expires_at ) plugin_alerter.check(data=data, plugin=plugin, server=server) if checks_data: formated_check_data = health_checks_results_model.save(data=checks_data, server=server) health_check_alerter.check(data=formated_check_data, server=server)
def get_all(self, account_id=None, dashboard_id=None, public=None): result_list = [] query = [] params = {'dashboard_id': dashboard_id} params = self.keys_to_mongoid(data=params, keys=['dashboard_id']) if dashboard_id: query = super(DashboardMetricsModel, self).get(params=params) utc_now = unix_utc_now() for metric in query: mongo_id = metric.get('_id') server_id = metric.get('server_id') metric_type = metric.get('metric_type') unique_id = metric.get('unique_id') check = metric.get('check') order = metric.get('order', 0) tags = metric.get('tags', []) tags_list = tags_model.get_list_of_tags(tags_list=tags, to_dict=True) server = server_model.get_by_id(server_id) process = process_model.get_by_id(metric.get('process_id')) plugin = plugin_model.get_by_id(metric.get('plugin_id')) gauge = plugin_model.get_gauge_by_id(gauge_id=metric.get('gauge_id')) volume = volumes_model.get_by_id(metric.get('device_id')) interface = interfaces_model.get_by_id(metric.get('device_id')) healthcheck_metric = health_checks_model.get_by_id(metric.get('healthcheck_id')) append = False unit = yaxis(check) if metric_type == 'system_global' and check == 'memory': unit = "%" if metric_type == 'system_global' and check == 'disk': unit = '%' if public: url = reverse('public_dashboard_metric', kwargs={"metric_id": mongo_id}) else: url = reverse('dashboard_metric', kwargs={"metric_id": mongo_id}) result = { 'id': mongo_id, 'unique_id': unique_id, 'metric_type': metric_type, 'url': url, 'utcnow': utc_now, 'name': '', 'unit': unit, 'tags': tags_list, 'order': order } if server: result.update({'server_id': server_id, 'type': 'server_metric','server_name' :server.get('name')}) if metric_type == 'system': result['name'] = "{0}".format(check) if volume: result['name'] = u"{0}.{1}".format(result['name'], volume['name']) if interface: result['name'] = u"{0}.{1}".format(result['name'], interface['name']) append = True elif metric_type == 'process' and process: process_name = process.get('name') result['name'] = u"{0}.{1}".format(process_name, check) result['process_id'] = process['_id'] append = True elif metric_type == 'plugin' and plugin and gauge: result['name'] = u"{0}.{1}".format(plugin.get('name'), gauge.get('name')) result['plugin_id'] = plugin['_id'] result['gauge_id'] = gauge['_id'] append = True result['name'] = u"{0}.{1}".format(server.get('name'), result['name']) elif healthcheck_metric: result['healthcheck'] = healthcheck_metric result['healthcheck_id'] = healthcheck_metric.get('_id') try: del result['healthcheck']['_id'] del result['healthcheck']['server_id'] del result['healthcheck']['tags'] del result['healthcheck']['file_id'] # Custom scripts except: pass result['type'] = 'healthcheck' append = True else: key = metric.get('key') # Overwrite keys for better visual presentation if check == 'network': key = 'inbound' if key == 'i' else 'outbound' result['name'] = u"{0}.{1}".format(check, key) append = True if metric_type == 'plugin_global': result['name'] = u'{0}.{1}.{2}'.format(metric.get('plugin'), metric.get('gauge'), metric.get('key')) append = True result = self.mongoid_to_str(result, ['server_id', 'id', 'process_id', 'plugin_id', 'metric_id', 'gauge_id', 'healthcheck_id',]) if append: result_list.append(result) from operator import itemgetter sorted_list = sorted(result_list, key=itemgetter('order')) return sorted_list