if 'notificationChannelIds' in a: for channel_id in a['notificationChannelIds']: if channel_id not in existing_notification_channel_ids: print(('Notification Channel ID ' + str(channel_id) + ' referenced in Alert "' + a['name'] + '" does not exist.\n Restoring without this ID.')) a['notificationChannelIds'].remove(channel_id) # The Create/Update APIs will validate but actually ignore these fields; # to avoid problems, don't submit in the API request for timefield in ['createdOn', 'modifiedOn']: del a[timefield] # NOTE: when exporting alerts that contain deprecated metrics you will # need to remove them from the source json # (see https://sysdigdocs.atlassian.net/wiki/spaces/Monitor/pages/205684810/Metrics#Metrics-HeuristicandDeprecatedMetrics) if a['name'] in existing_alerts: a['id'] = existing_alerts[a['name']]['id'] a['version'] = existing_alerts[a['name']]['version'] ok, res = sdclient.update_alert(a) updated_count += 1 else: ok, res = sdclient.create_alert(alert_obj=a) created_count += 1 if not ok: print(res) sys.exit(1) print(('All Alerts in ' + alerts_dump_file + ' restored successfully (' + str(created_count) + ' created, ' + str(updated_count) + ' updated)'))
ok, res = sdclient.get_notification_ids(notify_channels) if not ok: print("Could not get IDs and hence not creating the alert: " + res) sys.exit(-1) notification_channel_ids = res # # Create the alert. # ok, res = sdclient.create_alert( alert_name, # Alert name. 'this alert was automatically created using the python Sysdig Cloud library', # Alert description. 6, # Syslog-encoded severity. 6 means 'info'. 60, # The alert will fire if the condition is met for at least 60 seconds. 'avg(cpu.used.percent) > 80', # The condition. [ 'host.mac', 'proc.name' ], # Segmentation. We want to check this metric for every process on every machine. 'ANY', # in case there is more than one tomcat process, this alert will fire when a single one of them crosses the 80% threshold. 'proc.name = "tomcat"', # Filter. We want to receive a notification only if the name of the process meeting the condition is 'tomcat'. notification_channel_ids, False) # This alert will be disabled when it's created. # # Validate a print the results. # print(res) if not ok: sys.exit(1)
{'type': 'SNS', 'snsTopicARNs': ['arn:aws:sns:us-east-1:273107874544:alarms-stg']} ] res = sdclient.get_notification_ids(notify_channels) if not res[0]: print "Could not get IDs and hence not creating the alert: " + res[1] sys.exit(-1) notification_channel_ids = res[1] # # Create the alert. # res = sdclient.create_alert('tomcat cpu > 80% on any host', # Alert name. 'this alert was automatically created using the python Sysdig Cloud library', # Alert description. 6, # Syslog-encoded severity. 6 means 'info'. 60, # The alert will fire if the condition is met for at least 60 seconds. 'avg(cpu.used.percent) > 80', # The condition. ['host.mac', 'proc.name'], # Segmentation. We want to check this metric for every process on every machine. 'ANY', # in case there is more than one tomcat process, this alert will fire when a single one of them crosses the 80% threshold. 'proc.name = "tomcat"', # Filter. We want to receive a notification only if the name of the process meeting the condition is 'tomcat'. notification_channel_ids, False) # This alert will be disabled when it's created. # # Validate a print the results. # print res[1] if not res[0]: sys.exit(1)
def parse(self, objdata): user_id_map = {} ################################################################### # TEAM CREATION ################################################################### obj_name = objdata['metadata']['name'] team_members = objdata['metadata']['annotations'].get('sysdigTeamMembers', '').split(',') trecipients = objdata['metadata']['annotations'].get('sysdigAlertEmails', '').split(',') tdashboards = objdata['metadata']['annotations'].get('sysdigDashboards', '').split(',') alertsj = objdata['metadata']['annotations'].get('sysdigAlerts', json.dumps([])) if self._type == 'deployment' or self._type == 'service': ns_name = objdata['metadata']['namespace'] team_name = "%s%s_%s_%s" % (self._team_prefix, self._type, ns_name, obj_name) elif self._type == 'namespace': ns_name = objdata['metadata']['name'] team_name = "%s%s_%s" % (self._team_prefix, self._type, ns_name) else: Logger.log('unrecognized type argument', 'error') return False # # Resolve the user emails. # Add the users that are not part of sysdig cloud yet. # for o in team_members: uname = o.strip() res = self._customer_admin_sdclient.get_user(uname) if res[0] == False: if res[1] == USER_NOT_FOUND_ERR: Logger.log("adding user " + uname) res = self._customer_admin_sdclient.create_user_invite(uname) res = self._customer_admin_sdclient.get_user(uname) Logger.log("User added") if res[0] == False: Logger.log('cannot get user %s: %s' % (uname, res[1]), 'error') continue else: Logger.log('cannot get user %s: %s' % (uname, res[1]), 'error') continue user_id_map[uname] = res[1]['id'] if len(user_id_map) == 0: Logger.log('No users specified for this team. Skipping.', 'error') return False # # Normalize alert recipients # recipients = [] for r in trecipients: recipients.append(r.strip()) # # Normalize the dashboards list # dashboards = [] for d in tdashboards: dashboards.append(d.strip()) # # Parse the alerts json # alerts = [] try: alerts = json.loads(alertsj) except ValueError: Logger.log('Invalid JSON in the "alerts" field', 'error') return False # XXX This is here for testing purposes only # res = self._customer_admin_sdclient.delete_team(team_name) # # Check the existence of the team and create it if it doesn't exist # team_exists = True res = self._customer_admin_sdclient.get_team(team_name) if res[0] == False: if res[1] == TEAM_NOT_EXISTING_ERR: team_exists = False new_memberships = dict(map(lambda u: (u, 'ROLE_TEAM_EDIT'), user_id_map.keys())) else: teaminfo = res[1] teamid = teaminfo['id'] old_memberships = dict(map(lambda m: (m['userId'], m['role']), teaminfo['userRoles'])) new_memberships = dict(map(lambda u: (u, 'ROLE_TEAM_EDIT') if user_id_map[u] not in old_memberships else (u, old_memberships[user_id_map[u]]), user_id_map.keys())) if team_exists: # Team exists. Detect if there are users to add and edit the team users list. newusers = [] team_uids = set(old_memberships.keys()) if team_uids != set(user_id_map.values()): Logger.log("Detected modified %s %s, editing team %s" % (self._type, obj_name, team_name)) newusers.append([u for u in user_id_map.keys() if user_id_map[u] not in team_uids]) res = self._customer_admin_sdclient.edit_team(team_name, memberships=new_memberships) if res[0] == False: Logger.log('Team editing failed: ' + res[1], 'error') return False else: Logger.log("Detected new %s %s, adding team %s" % (self._type, obj_name, team_name)) # Team doesn't exist. Try to create it. if self._type == 'deployment': flt = 'kubernetes.namespace.name = "%s" and kubernetes.deployment.name = "%s"' % (ns_name, obj_name) elif self._type == 'service': flt = 'kubernetes.namespace.name = "%s" and kubernetes.service.name = "%s"' % (ns_name, obj_name) elif self._type == 'namespace': flt = 'kubernetes.namespace.name = "%s"' % ns_name desc = 'automatically generated team based on deployment annotations' res = self._customer_admin_sdclient.create_team(team_name, filter=flt, description=desc, show='container', memberships=new_memberships) if res[0] == False: Logger.log('Team creation failed: ' + res[1], 'error') return False teamid = res[1]['team']['id'] newusers = user_id_map.keys() ################################################################### # TEAM CONFIGURATION ################################################################### # # If we have alerts, create a notification channel and point the # alerts at it. # if alerts: Logger.log('adding notification recipients') # # These steps can be done as the admin user since notification # channels have global scope and alerts has team scope, and admin # users are members of all teams. # res = self._customer_admin_sdclient.get_user_api_token(self._customer_id, team_name) if res[0] == False: Logger.log('Can\'t fetch token for user ' + user, 'error') return False else: utoken_t = res[1] teamclient = SdcClient(utoken_t, self._sdc_url) # # Add the email notification channel. This will silently fail # if it has already been created. # res = teamclient.create_email_notification_channel(team_name, recipients) if not res[0]: if res[1][:20] != EXISTING_CHANNEL_ERR: Logger.log('Error setting email recipient: ' + res[1], 'error') return False # # Get the notification channel ID to use for the alerts. # notify_channels = [{'type': 'EMAIL', 'name': team_name}] res = teamclient.get_notification_ids(notify_channels) if not res[0]: Logger.log("cannot create the email notification channel: " + res[1], 'error') return False notification_channel_ids = res[1] # # Make sure the members of the email notification channel are current. # Since we searched for the channel by name, there should only be one. But # since get_notification_ids() returns a list, treat it as such. # for channel_id in notification_channel_ids: res = teamclient.get_notification_channel(channel_id) if not res[0]: Logger.log("cannot find the email notification channel: " + res[1], 'error') return False c = res[1] current_recip = c['options']['emailRecipients'] if set(current_recip) == set(recipients): Logger.log('email recipients have not changed since last update', 'info') else: Logger.log('email recipients have changed - updating', 'info') c['options']['emailRecipients'] = copy.deepcopy(recipients) teamclient.update_notification_channel(c) # # Add the Alerts # res = teamclient.get_alerts() if not res[0]: Logger.log("cannot get user alerts: " + res[1], 'error') return False cur_alerts = res[1]['alerts'] for a in alerts: aname = a.get('name', '') # # Check if this alert already exists # skip = False for ca in cur_alerts: if ca['name'] == aname and 'annotations' in ca: skip = True break if skip: # # Alert already exists, skip the creation # continue Logger.log('adding alert %s' % aname) res = teamclient.create_alert(aname, # Alert name. a.get('description', ''), # Alert description. a.get('severity', 6), # Syslog-encoded severity. 6 means 'info'. a.get('timespan', 60000000), # The alert will fire if the condition is met for at least 60 seconds. a.get('condition', ''), # The condition. a.get('segmentBy', []), # Segmentation. We want to check this metric for every process on every machine. a.get('segmentCondition', 'ANY'), # in case there is more than one tomcat process, this alert will fire when a single one of them crosses the 80% threshold. a.get('filter', ''), # Filter. We want to receive a notification only if the name of the process meeting the condition is 'tomcat'. notification_channel_ids, a.get('enabled', True), {'engineTeam': team_name + aname}) if not res[0]: Logger.log('Error creating alert: ' + res[1], 'error') # # Go through the list of new users and set them up for this team # for user in user_id_map.keys(): # # First of all, we need to impersonate the users in this team # so that we can configure their workplace. This is # currently a little bit tricky because it involves: # - finding the user token using the admin API # - logging in with the new user token # Logger.log('impersonating user ' + user) res = self._customer_admin_sdclient.get_user_api_token(user, team_name) if res[0] == False: Logger.log('Can\'t fetch token for user ' + user, 'error') return False else: utoken_t = res[1] teamclient = SdcClient(utoken_t, self._sdc_url) Logger.log('waiting for activation of user ' + user) while True: res = teamclient.get_user_token() if res[0] == True: break else: time.sleep(3) # # Now that we are in the right user context, we can start to apply the # configurations. First of all we set a default kube-friendly grouping hierarchy. # We do this only is the user is new to the group, because we don't want to # pollute the grouping of existing users. # if user in newusers: Logger.log('setting grouping') if self._type == 'service': res = teamclient.set_explore_grouping_hierarchy(['kubernetes.namespace.name', 'kubernetes.service.name', 'kubernetes.pod.name', 'container.id']) else: res = teamclient.set_explore_grouping_hierarchy(['kubernetes.namespace.name', 'kubernetes.deployment.name', 'kubernetes.pod.name', 'container.id']) if res[0] == False: Logger.log('Failed setting team grouping: ' + res[1], 'error') return False # # Add the dashboards # res = teamclient.get_dashboards() if not res[0]: Logger.log('Error getting the dasboards list: ' + res[1], 'error') break existing_dasboards = res[1]['dashboards'] for d in dashboards: skip = False for ex in existing_dasboards: if ex['name'] == d: if ex['isShared'] and 'annotations' in ex and ex['annotations'].get('engineTeam') == team_name + d: # dashboard already exists. Skip adding it skip = True break if skip: continue Logger.log('adding dasboard ' + d) res = teamclient.create_dashboard_from_view(d, d, None, True, {'engineTeam': team_name + d, 'ownerUser': user}) if not res[0]: Logger.log('Error creating dasboard: ' + res[1], 'error')
# # Instantiate the SDC client # sdclient = SdcClient(sdc_token) # # Create the alert. # res = sdclient.create_alert( 'tomcat cpu > 80% on any host', # Alert name. 'this alert was automatically created using the python Sysdig Cloud library', # Alert description. 6, # Syslog-encoded severity. 6 means 'info'. 60, # The alert will fire if the condition is met for at least 60 seconds. 'avg(cpu.used.percent) > 80', # The condition. [ 'host.mac', 'proc.name' ], # Segmentation. We want to check this metric for every process on every machine. 'ANY', # in case there is more than one tomcat process, this alert will fire when a single one of them crosses the 80% threshold. 'proc.name = "tomcat"', # Filter. We want to receive a notification only if the name of the process meeting the condition is 'tomcat'. [ 'EMAIL' ], # Notification target. We want an email to be sent. Alerts email recipients can be defined here: https://app.sysdigcloud.com/#/settings/notifications False) # This alert will be disabled when it's created. # # Validate a print the results. # print res[1] if not res[0]: sys.exit(1)