def update_indicator_values(indicator: Indicator,
                            start: int,
                            end=None,
                            **kwargs):
    """Query and update indicator values"""
    current_span = extract_span_from_kwargs(**kwargs)

    session = db.session

    result = query_sli(indicator.name, indicator.source, start, end)

    if result:
        insert_span = opentracing.tracer.start_span(
            operation_name='insert_indicator_values', child_of=current_span)
        (insert_span.set_tag('indicator',
                             indicator.name).set_tag('indicator_id',
                                                     indicator.id))

        insert_span.log_kv({'result_count': len(result)})

        with insert_span:
            for minute, val in result.items():
                if val > 0:
                    val = max(val, MIN_VAL)
                elif val < 0:
                    val = min(val, MIN_VAL * -1)

                iv = IndicatorValue(timestamp=minute,
                                    value=val,
                                    indicator_id=indicator.id)
                insert_indicator_value(session, iv)

        session.commit()

    return len(result)
Exemple #2
0
    def update_dashboard(self, dashboard: dict, **kwargs) -> dict:
        """
        Create or update dashboard.

        If dashboard has an ``id`` then dashboard will be updated, otherwise a new dashboard is created.

        :param dashboard: ZMON dashboard dict.
        :type dashboard: int, str

        :return: Dashboard dict.
        :rtype: dict
        """
        current_span = extract_span_from_kwargs(**kwargs)
        if 'id' in dashboard and dashboard['id']:
            logger.debug('Updating dashboard with ID: {} ...'.format(dashboard['id']))
            current_span.set_tag('dashboard_id', dashboard['id'])

            resp = self.session.post(self.endpoint(DASHBOARD, dashboard['id']), json=dashboard, timeout=self._timeout)
        else:
            # new dashboard
            logger.debug('Adding new dashboard ...')
            resp = self.session.post(self.endpoint(DASHBOARD), json=dashboard, timeout=self._timeout)

        resp.raise_for_status()

        return self.json(resp)
Exemple #3
0
    def create_alert_definition(self, alert_definition: dict, **kwargs) -> dict:
        """
        Create new alert definition.

        Attributes ``last_modified_by`` and ``check_definition_id`` are required.
        If ``status`` is not set, then it will be set to ``ACTIVE``.

        :param alert_definition: ZMON alert definition dict.
        :type alert_definition: dict

        :return: Alert definition dict.
        :rtype: dict
        """
        current_span = extract_span_from_kwargs(**kwargs)
        if 'last_modified_by' not in alert_definition:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': 'Alert definition must have "last_modified_by"'})
            raise ZmonArgumentError('Alert definition must have "last_modified_by"')

        if 'status' not in alert_definition:
            alert_definition['status'] = 'ACTIVE'

        if 'check_definition_id' not in alert_definition:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': 'Alert definition must have "last_modified_by"'})
            raise ZmonArgumentError('Alert defintion must have "check_definition_id"')
        current_span.set_tag('check_id', alert_definition['check_definition_id'])

        resp = self.session.post(self.endpoint(ALERT_DEF), json=alert_definition, timeout=self._timeout)

        return self.json(resp)
def get_all_stack_names(cf, **kwargs):
    stacks = []
    current_span = extract_span_from_kwargs(**kwargs)
    paginator = cf.get_paginator('list_stacks')

    try:
        response_iterator = call_and_retry(
            lambda: paginator.paginate(StackStatusFilter=STACK_STATUS_FILTER))
        for page in response_iterator:
            summaries = page['StackSummaries']
            for summary in summaries:
                stacks.append(summary['StackName'])
        current_span.log_kv({"num_stacks": len(stacks)})
    except Exception as e:
        if isinstance(
                e,
                ClientError) and e.response['Error']['Code'] == 'AccessDenied':
            msg = 'Access to AWS CloudFormation denied. You may need the cloudformation:ListStacks permission'
            logger.warning(msg)
            current_span.log_kv({'message': msg})
            current_span.set_tag('access_denied', True)
        else:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception('Failed to retrieve stack names')

    return stacks
Exemple #5
0
    def get_alert_data(self, alert_id: int, **kwargs) -> dict:
        """
        Retrieve alert data.

        Response is a ``dict`` with entity ID as a key, and check return value as a value.

        :param alert_id: ZMON alert ID.
        :type alert_id: int

        :return: Alert data dict.
        :rtype: dict

        Example:

        .. code-block:: json

            {
                "entity-id-1": 122,
                "entity-id-2": 0,
                "entity-id-3": 100
            }
        """
        current_span = extract_span_from_kwargs(**kwargs)
        current_span.set_tag('alert_id', str(alert_id))
        resp = self.session.get(self.endpoint(ALERT_DATA, alert_id, 'all-entities'), timeout=self._timeout)

        return self.json(resp)
Exemple #6
0
def get_cluster_ingresses(kube_client, cluster_id, alias, environment, region, infrastructure_account,
                          namespace='default', **kwargs) -> list:
    current_span = extract_span_from_kwargs(**kwargs)

    entities = []

    ingresses = get_all(kube_client, kube_client.get_ingresses, namespace, span=current_span)

    for ingress in ingresses:
        obj = ingress.obj

        entity = {
            'id': 'ingress-{}-{}[{}]'.format(ingress.name, ingress.namespace, cluster_id),
            'type': INGRESS_TYPE,
            'kube_cluster': cluster_id,
            'alias': alias,
            'environment': environment,
            'created_by': AGENT_TYPE,
            'infrastructure_account': infrastructure_account,
            'region': region,

            'ingress_name': ingress.name,
            'ingress_namespace': ingress.namespace,

            'ingress_rules': obj['spec'].get('rules', [])
        }

        entity.update(entity_labels(obj, 'labels'))

        entities.append(entity)

    return entities
Exemple #7
0
def get_cluster_namespaces(
        kube_client, cluster_id, alias, environment, region, infrastructure_account, namespace=None, **kwargs) -> list:

    current_span = extract_span_from_kwargs(**kwargs)  # noqa

    entities = []

    for ns in kube_client.get_namespaces():
        obj = ns.obj
        if namespace and namespace != ns.name:
            continue

        entity = {
            'id': 'namespace-{}[{}]'.format(ns.name, cluster_id),
            'type': NAMESPACE_TYPE,
            'kube_cluster': cluster_id,
            'alias': alias,
            'environment': environment,
            'created_by': AGENT_TYPE,
            'infrastructure_account': infrastructure_account,
            'region': region,

            'namespace_name': ns.name,
        }

        entity.update(entity_labels(obj, 'labels', 'annotations'))

        entities.append(entity)

    return entities
Exemple #8
0
    def update_grafana_dashboard(self, grafana_dashboard: dict, **kwargs) -> dict:
        """
        Update existing Grafana dashboard.

        Atrributes ``uid`` and ``title`` are required.

        :param grafana_dashboard: Grafana dashboard dict.
        :type grafana_dashboard: dict

        :return: Grafana dashboard dict.
        :rtype: dict
        """
        current_span = extract_span_from_kwargs(**kwargs)

        if 'uid' not in grafana_dashboard['dashboard']:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': 'Grafana dashboard must have "uid". Use Grafana6 dashboard format.'})
            raise ZmonArgumentError('Grafana dashboard must have "uid". Hint: Use Grafana6 dashboard format.')

        elif 'title' not in grafana_dashboard['dashboard']:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': 'Grafana dashboard must have "title"'})
            raise ZmonArgumentError('Grafana dashboard must have "title"')

        current_span.set_tag('grafana_dashboard_uid', grafana_dashboard['dashboard']['uid'])

        if 'id' in grafana_dashboard['dashboard'] and grafana_dashboard['dashboard']['id'] is not None:
            current_span.set_tag('grafana_dashboard_id', grafana_dashboard['dashboard']['id'])

        resp = self.session.post(self.endpoint(GRAFANA), json=json.dumps(grafana_dashboard), timeout=self._timeout)

        return self.json(resp)
Exemple #9
0
def remove_missing_entities(existing_ids,
                            current_ids,
                            zmon_client,
                            dry_run=False,
                            **kwargs):
    current_span = extract_span_from_kwargs(**kwargs)

    to_be_removed_ids = list(set(existing_ids) - set(current_ids))

    error_count = 0

    if not dry_run:
        logger.info('Removing {} entities from ZMON'.format(
            len(to_be_removed_ids)))
        for entity_id in to_be_removed_ids:
            logger.info('Removing entity with id: {}'.format(entity_id))
            try:
                deleted = zmon_client.delete_entity(entity_id)
                if not deleted:
                    current_span.set_tag('error', True)
                    logger.info('Failed to delete entity!')
                    error_count += 1
            except Exception:
                current_span.set_tag('error', True)
                current_span.log_kv({'exception': traceback.format_exc()})

    return to_be_removed_ids, error_count
Exemple #10
0
    def add_entity(self, entity: dict, **kwargs) -> requests.Response:
        """
        Create or update an entity on ZMON.

        .. note::

            ZMON PUT entity API doesn't return JSON response.

        :param entity: Entity dict.
        :type entity: dict

        :return: Response object.
        :rtype: :class:`requests.Response`
        """
        if 'id' not in entity or 'type' not in entity:
            raise ZmonArgumentError('Entity "id" and "type" are required.')

        if not self.is_valid_entity_id(entity['id']):
            raise ZmonArgumentError('Invalid entity ID.')

        logger.debug('Adding new entity: {} ...'.format(entity['id']))

        current_span = extract_span_from_kwargs(**kwargs)
        current_span.set_tag('entity_id', entity['id'])

        data = json.dumps(entity, cls=JSONDateEncoder)
        resp = self.session.put(self.endpoint(ENTITIES, trailing_slash=False), data=data, timeout=self._timeout)

        resp.raise_for_status()

        return resp
Exemple #11
0
    def update_check_definition(self, check_definition, skip_validation=False, **kwargs) -> dict:
        """
        Update existing check definition.

        Atrribute ``owning_team`` is required. If ``status`` is not set, then it will be set to ``ACTIVE``.

        :param check_definition: ZMON check definition dict.
        :type check_definition: dict

        :param skip_validation: Skip validation of the check command syntax.
        :type skip_validation: bool

        :return: Check definition dict.
        :rtype: dict
        """
        current_span = extract_span_from_kwargs(**kwargs)
        if 'owning_team' not in check_definition:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': 'Check definition must have "owning_team"'})
            raise ZmonArgumentError('Check definition must have "owning_team"')

        if 'status' not in check_definition:
            check_definition['status'] = 'ACTIVE'

        if not skip_validation:
            try:
                self.validate_check_command(check_definition['command'])
            except Exception:
                current_span.set_tag('error', True)
                current_span.log_kv({'exception': traceback.format_exc()})
                raise

        resp = self.session.post(self.endpoint(CHECK_DEF), json=check_definition, timeout=self._timeout)

        return self.json(resp)
Exemple #12
0
    def get_entities(self, query=None, **kwargs) -> list:
        """
        Get ZMON entities, with optional filtering.

        :param query: Entity filtering query. Default is ``None``. Example query ``{'type': 'instance'}`` to return
                      all entities of type: ``instance``.
        :type query: dict

        :return: List of entities.
        :rtype: list
        """
        query_str = json.dumps(query) if query else ''
        logger.debug(
            'Retrieving entities with query: {} ...'.format(query_str))

        current_span = extract_span_from_kwargs(**kwargs)
        current_span.log_kv({'query', query_str})

        params = {'query': query_str} if query else None

        resp = self.session.get(self.endpoint(ENTITIES),
                                params=params,
                                timeout=self._timeout)

        return self.json(resp)
Exemple #13
0
def add_new_entities(all_current_entities,
                     existing_entities,
                     zmon_client,
                     dry_run=False,
                     **kwargs):
    current_span = extract_span_from_kwargs(**kwargs)

    existing_entities_dict = {e['id']: e for e in existing_entities}
    new_entities = [
        e for e in all_current_entities
        if new_or_updated_entity(e, existing_entities_dict)
    ]

    error_count = 0

    if not dry_run:
        logger.info(
            'Found {} new or updated entities to be added in ZMON'.format(
                len(new_entities)))
        for entity in new_entities:
            logger.info('Adding new or updated {} entity with ID: {}'.format(
                entity['type'], entity['id']))
            try:
                resp = zmon_client.add_entity(entity)
                resp.raise_for_status()
            except Exception:
                current_span.set_tag('error', True)
                logger.exception('Failed to add entity!')
                current_span.log_kv({
                    'exception': traceback.format_exc(),
                    "entity": entity
                })
                error_count += 1

    return new_entities, error_count
def list_postgres_databases(*args, **kwargs):
    try:
        query = """
            SELECT datname
              FROM pg_database
             WHERE datname NOT IN('postgres', 'template0', 'template1')
        """
        current_span = extract_span_from_kwargs(**kwargs)
        kwargs = clean_opentracing_span(**kwargs)

        current_span.set_tag(ot_tags.PEER_ADDRESS,
                             'psql://{}:{}'.format(kwargs.get('host'), kwargs.get('port')))
        current_span.set_tag(ot_tags.DATABASE_INSTANCE, kwargs.get('dbname'))
        current_span.set_tag(ot_tags.DATABASE_STATEMENT, query)

        kwargs.update({'connect_timeout': POSTGRESQL_CONNECT_TIMEOUT})
        conn = psycopg2.connect(*args, **kwargs)
        cur = conn.cursor()
        cur.execute(query)
        return [row[0] for row in cur.fetchall()]
    except Exception:
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed to list DBs!')
        return []
Exemple #15
0
def list_postgres_databases(*args, **kwargs):
    try:
        query = """
            SELECT datname
              FROM pg_database
             WHERE datname NOT IN('postgres', 'template0', 'template1')
        """
        current_span = extract_span_from_kwargs(**kwargs)
        kwargs = clean_opentracing_span(**kwargs)

        current_span.set_tag(
            ot_tags.PEER_ADDRESS,
            'psql://{}:{}'.format(kwargs.get('host'), kwargs.get('port')))
        current_span.set_tag(ot_tags.DATABASE_INSTANCE, kwargs.get('dbname'))
        current_span.set_tag(ot_tags.DATABASE_STATEMENT, query)

        kwargs.update({'connect_timeout': POSTGRESQL_CONNECT_TIMEOUT})
        conn = psycopg2.connect(*args, **kwargs)
        cur = conn.cursor()
        cur.execute(query)
        return [row[0] for row in cur.fetchall()]
    except Exception:
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed to list DBs!')
        return []
Exemple #16
0
def get_auto_scaling_groups(region, acc, **kwargs):
    groups = []

    as_client = boto3.client('autoscaling', region_name=region)
    ec2_client = boto3.client('ec2', region_name=region)

    paginator = as_client.get_paginator('describe_auto_scaling_groups')

    asgs = call_and_retry(
        lambda: paginator.paginate(PaginationConfig={'MaxItems': MAX_PAGE}).build_full_result()['AutoScalingGroups'])

    for g in asgs:
        sg = {
            'id': entity_id('asg-{}[{}:{}]'.format(g['AutoScalingGroupName'], acc, region)),
            'type': 'asg',
            'infrastructure_account': acc,
            'region': region,
            'created_by': 'agent',
            'name': g['AutoScalingGroupName'],
            'availability_zones': g['AvailabilityZones'],
            'desired_capacity': g['DesiredCapacity'],
            'max_size': g['MaxSize'],
            'min_size': g['MinSize'],
            'created_time': g['CreatedTime'].strftime('%Y-%m-%d %H:%M:%S.%f'),
        }

        assign_properties_from_tags(sg, g.get('Tags', []))

        add_traffic_tags_to_entity(sg)

        sg['instances'] = []
        instance_ids = [i['InstanceId'] for i in g['Instances'] if i['LifecycleState'] == 'InService']
        #
        # Avoid describing instances when there's nothing to filter
        # for: that would claim *every* instance in the account.
        #
        if instance_ids:
            ec2_paginator = ec2_client.get_paginator('describe_instances')

            try:
                reservations = call_and_retry(
                    lambda: ec2_paginator.paginate(InstanceIds=instance_ids).build_full_result()['Reservations'])

                for r in reservations:
                    for i in r['Instances']:
                        if 'PrivateIpAddress' in i:
                            sg['instances'].append({
                                'aws_id': i['InstanceId'],
                                'ip': i['PrivateIpAddress'],
                            })
            except Exception:
                current_span = extract_span_from_kwargs(**kwargs)
                current_span.set_tag('error', True)
                current_span.log_kv({'exception': traceback.format_exc()})
                logger.exception('Failed in retrieving instances for ASG: {}'.format(sg['name']))

        groups.append(sg)

    return groups
Exemple #17
0
    def parent(**kwargs):
        assert is_span_in_kwargs(**kwargs) is pass_span

        if pass_span:
            current_span = extract_span_from_kwargs(**kwargs)
            assert current_span.operation_name == 'parent'

        nested()
Exemple #18
0
    def notify(cls, alert, *args, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        url = cls._config.get('notifications.hipchat.url')
        token = kwargs.get('token', cls._config.get('notifications.hipchat.token'))
        repeat = kwargs.get('repeat', 0)
        notify = kwargs.get('notify', False)
        alert_def = alert['alert_def']
        message_format = kwargs.get('message_format', 'html')

        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        current_span.log_kv({'room': kwargs.get('room')})

        color = 'green' if alert and not alert.get('is_alert') else kwargs.get('color', 'red')

        message_text = cls._get_subject(alert, custom_message=kwargs.get('message'))

        if kwargs.get('link', False):
            zmon_host = kwargs.get('zmon_host', cls._config.get('zmon.host'))
            alert_id = alert['alert_def']['id']
            alert_url = urlparse.urljoin(zmon_host, '/#/alert-details/{}'.format(alert_id)) if zmon_host else ''
            link_text = kwargs.get('link_text', 'go to alert')
            if message_format == 'html':
                message_text += ' -- <a href="{}" target="_blank">{}</a>'.format(alert_url, link_text)
            else:
                message_text += ' -- {} - {}'.format(link_text, alert_url)

        message = {
            'message': message_text,
            'color': color,
            'notify': notify,
            'message_format': message_format
        }

        try:
            logger.info(
                'Sending to: ' + '{}/v2/room/{}/notification?auth_token={}'.format(url, urllib.quote(kwargs['room']),
                                                                                   token) + ' ' + json.dumps(message))
            r = requests.post(
                '{}/v2/room/{}/notification'.format(url, urllib.quote(kwargs['room'])),
                json=message, params={'auth_token': token}, headers={'Content-type': 'application/json'})
            r.raise_for_status()
        except Exception:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception('Hipchat write failed!')

        return repeat
Exemple #19
0
    def notify(cls, alert, *args, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        url = cls._config.get('notifications.hipchat.url')
        token = kwargs.get('token',
                           cls._config.get('notifications.hipchat.token'))
        repeat = kwargs.get('repeat', 0)
        notify = kwargs.get('notify', False)
        alert_def = alert['alert_def']

        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        current_span.log_kv({'room': kwargs.get('room')})

        color = 'green' if alert and not alert.get('is_alert') else kwargs.get(
            'color', 'red')

        message_text = cls._get_subject(alert,
                                        custom_message=kwargs.get('message'))

        if kwargs.get('link', False):
            zmon_host = kwargs.get('zmon_host', cls._config.get('zmon.host'))
            alert_id = alert['alert_def']['id']
            alert_url = urlparse.urljoin(
                zmon_host,
                '/#/alert-details/{}'.format(alert_id)) if zmon_host else ''
            link_text = kwargs.get('link_text', 'go to alert')
            message_text += ' -- <a href="{}" target="_blank">{}</a>'.format(
                alert_url, link_text)

        message = {'message': message_text, 'color': color, 'notify': notify}

        try:
            logger.info('Sending to: ' +
                        '{}/v2/room/{}/notification?auth_token={}'.format(
                            url, urllib.quote(kwargs['room']), token) + ' ' +
                        json.dumps(message))
            r = requests.post('{}/v2/room/{}/notification'.format(
                url, urllib.quote(kwargs['room'])),
                              json=message,
                              params={'auth_token': token},
                              headers={'Content-type': 'application/json'})
            r.raise_for_status()
        except Exception as e:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': str(e)})
            logger.exception('Hipchat write failed!')

        return repeat
Exemple #20
0
    def notify(cls, alert, *args, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        repeat = kwargs.get('repeat', 0)
        oauth2 = kwargs.get('oauth2', True)
        headers = {'Content-type': 'application/json'}
        timeout = 5

        alert_def = alert['alert_def']
        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        url = cls._config.get('notifications.service.url', None)
        if not url:
            current_span.set_tag('notification_invalid', True)
            current_span.log_kv({'reason': 'No notification service url set!'})
            logger.error('No notification service url set')
            return repeat

        url = url + '/api/v1/twilio'

        if oauth2:
            headers.update({'Authorization': 'Bearer {}'.format(tokens.get('uid'))})
        else:
            key = kwargs.get('key', cls._config.get('notifications.service.key'))
            headers.update({'Authorization': 'Bearer {}'.format(key)})

        headers['User-Agent'] = get_user_agent()

        data = {
            'message': kwargs.get('message', cls._get_subject(alert)),
            'escalation_team': kwargs.get('team', alert['alert_def'].get('team', '')),
            'numbers': kwargs.get('numbers', []),
            'voice': kwargs.get('voice', 'woman'),
            'alert_id': alert['alert_def']['id'],
            'entity_id': alert['entity']['id'],
            'event_type': 'ALERT_ENDED' if alert and not alert.get('is_alert') else 'ALERT_START',
            'alert_changed': alert.get('alert_changed', False),
        }

        try:
            logger.info('Sending HTTP POST request to {}'.format(url))
            r = requests.post(url, data=json.dumps(data, cls=JsonDataEncoder), headers=headers, timeout=timeout)

            r.raise_for_status()
        except Exception:
            logger.exception('Twilio Request failed!')

        return repeat
Exemple #21
0
    def save_object(self, obj: Target, **kwargs) -> Target:
        current_span = extract_span_from_kwargs(**kwargs)
        current_span.log_kv({'objective_id': obj.objective_id})
        current_span.log_kv({'indicator_id': obj.indicator_id})

        db.session.add(obj)
        db.session.commit()

        return obj
Exemple #22
0
    def notify(cls, alert, *args, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        alert_def = alert['alert_def']
        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        provider_url = cls._config.get('notifications.sms.provider_url',
                                       SMS_PROVIDER_URL)
        phone_numbers = BaseNotification.resolve_group(args, phone=True)
        repeat = kwargs.get('repeat', 0)

        maxlen = cls._config.get('notifications.sms.maxlength', SMS_MAXLENGTH)
        message = cls._get_subject(
            alert, custom_message=kwargs.get('message'))[:maxlen]

        request_params = {
            'to': '',
            'key': cls._config['notifications.sms.apikey'],
            'from': cls._config.get('notifications.sms.sender', SMS_SENDER),
            'route': cls._config.get('notifications.sms.route', SMS_ROUTE),
            'message': message,
            'cost': 1,
            'message_id': 1,
        }

        try:
            if cls._config.get('notifications.sms.on', True):
                for phone in phone_numbers:
                    request_params['to'] = phone
                    r = requests.get(provider_url,
                                     params=request_params,
                                     verify=False)
                    url_secured = r.url.replace(
                        request_params['key'],
                        '*' * len(request_params['key']))
                    logger.info(
                        'SMS sent: request to %s --> status: %s, response headers: %s, response body: %s',
                        url_secured, r.status_code, r.headers, r.text)
                    r.raise_for_status()
        except Exception as e:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': str(e)})
            logger.exception(
                'Failed to send sms for alert %s with id %s to: %s',
                alert_def['name'], alert_def['id'], list(phone_numbers))
        finally:
            return repeat
Exemple #23
0
def get_account_id(region, **kwargs):
    try:
        iam_client = boto3.client('iam', region_name=region)
        role = iam_client.list_roles()['Roles'][0]
        return role['Arn'].split(':')[4]
    except Exception:
        current_span = extract_span_from_kwargs(**kwargs)
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        return None
Exemple #24
0
def get_account_id(region, **kwargs):
    try:
        iam_client = boto3.client('iam', region_name=region)
        role = iam_client.list_roles()['Roles'][0]
        return role['Arn'].split(':')[4]
    except Exception:
        current_span = extract_span_from_kwargs(**kwargs)
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        return None
Exemple #25
0
def update_local_entity(zmon_client, entity, **kwargs):
    current_span = extract_span_from_kwargs(**kwargs)
    current_span.set_tag('entity_type', 'local')
    current_span.set_tag('entity_id', entity['id'])
    try:
        zmon_client.add_entity(entity)
    except Exception:
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed to add Local entity: {}'.format(entity))
Exemple #26
0
def get_account_alias(region, **kwargs):
    try:
        iam_client = boto3.client('iam', region_name=region)
        resp = iam_client.list_account_aliases()
        return resp['AccountAliases'][0]
    except Exception:
        current_span = extract_span_from_kwargs(**kwargs)
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        return None
Exemple #27
0
def update_local_entity(zmon_client, entity, **kwargs):
    current_span = extract_span_from_kwargs(**kwargs)
    current_span.set_tag('entity_type', 'local')
    current_span.set_tag('entity_id', entity['id'])
    try:
        zmon_client.add_entity(entity)
    except Exception:
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed to add Local entity: {}'.format(entity))
Exemple #28
0
def get_account_alias(region, **kwargs):
    try:
        iam_client = boto3.client('iam', region_name=region)
        resp = iam_client.list_account_aliases()
        return resp['AccountAliases'][0]
    except Exception:
        current_span = extract_span_from_kwargs(**kwargs)
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        return None
Exemple #29
0
def get_certificates(region, acc, **kwargs):
    iam_client = boto3.client('iam', region_name=region)
    acm_client = boto3.client('acm', region_name=region)

    entities = []

    try:
        server_certs = iam_client.list_server_certificates()['ServerCertificateMetadataList']

        acm_certs = acm_client.list_certificates()['CertificateSummaryList']

        for cert in server_certs:
            e = {
                'id': entity_id('cert-iam-{}[{}:{}]'.format(cert['ServerCertificateName'], acc, region)),
                'type': 'certificate',
                'infrastructure_account': acc,
                'region': region,
                'created_by': 'agent',
                'certificate_type': 'iam',
                'name': cert['ServerCertificateName'],
                'arn': cert['Arn'],
                'status': 'ISSUED',
                'expiration': cert['Expiration'].isoformat(),
                'in_use': True  # The results do not contain reference to existing use of certs
            }

            entities.append(e)

        for cert in acm_certs:
            c = acm_client.describe_certificate(CertificateArn=cert['CertificateArn'])['Certificate']

            cert_id = cert['CertificateArn'].split('/')[-1]
            e = {
                'id': entity_id('cert-acm-{}-{}[{}:{}]'.format(cert_id, c['DomainName'], acc, region)),
                'type': 'certificate',
                'infrastructure_account': acc,
                'region': region,
                'created_by': 'agent',
                'certificate_type': 'acm',
                'name': c['DomainName'],
                'arn': c['CertificateArn'],
                'status': c['Status'],
                'expiration': c['NotAfter'].isoformat() if 'NotAfter' in c else '',
                'in_use': len(c['InUseBy']) > 0,
            }

            entities.append(e)
    except Exception:
        current_span = extract_span_from_kwargs(**kwargs)
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed while retrieving IAM/ACM certificates, IAM role has no access?')

    return entities
Exemple #30
0
def get_rds_instances(region, acc, existing_entities, **kwargs):
    entities = []

    now = datetime.now()

    rds_entities = [r for r in existing_entities if r['type'] == 'database' and r['id'].startswith('rds-')]

    if now.minute % 15:
        return rds_entities

    try:
        rds_client = boto3.client('rds', region_name=region)

        paginator = rds_client.get_paginator('describe_db_instances')

        instances = call_and_retry(lambda: paginator.paginate(
            PaginationConfig={'MaxItems': MAX_PAGE}).build_full_result())

        for i in instances['DBInstances']:

            db = {
                'id': entity_id('rds-{}[{}]'.format(i['DBInstanceIdentifier'], acc)),
                'created_by': 'agent',
                'infrastructure_account': '{}'.format(acc),
                'region': region,
                'type': 'database',
                'engine': i['Engine'],
                'port': i['Endpoint']['Port'],
                'host': i['Endpoint']['Address'],
                'name': i['DBInstanceIdentifier'],
                'instance_type': i.get('DBInstanceClass', ''),
                'storage_type': i.get('StorageType', ''),
                'storage_size': i.get('AllocatedStorage', ''),
            }

            if 'EngineVersion' in i:
                db['version'] = i['EngineVersion']

            cluster_name = db['name']
            if i.get('DBName'):
                cluster_name = i['DBName']

            db['shards'] = {cluster_name: '{}:{}/{}'.format(db['host'], db['port'], cluster_name)}

            entities.append(db)

    except Exception:
        current_span = extract_span_from_kwargs(**kwargs)
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed to get RDS instance')

    return entities
Exemple #31
0
def add_entity(zmon_client, entity, **kwargs):
    current_span = extract_span_from_kwargs(**kwargs)
    current_span.set_tag('entity_type', entity['type'])
    current_span.set_tag('entity_id', entity['id'])
    try:
        logger.info('Adding new {} entity with ID: {}'.format(entity['type'], entity['id']))
        zmon_client.add_entity(entity)
        return 0
    except Exception:
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed to add entity: {}'.format(entity))
        return 1
Exemple #32
0
    def notify(cls, alert, *args, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        alert_def = alert['alert_def']
        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        url = kwargs.get('url', cls._config.get('notifications.push.url'))
        key = kwargs.get('key', cls._config.get('notifications.push.key'))

        if url is None or not url:
            return 0

        repeat = kwargs.get('repeat', 0)

        message = {
            "notification": {
                "icon": 'clean.png' if alert and not alert.get('is_alert') else 'warning.png',
                "title": kwargs.get("message", cls._get_expanded_alert_name(alert)),
                "body": kwargs.get("body", formatEntity(alert["entity"]["id"])),
                "alert_changed": alert.get('alert_changed', False),
                "click_action": kwargs.get("click_action", "/#/alert-details/{}".format(alert["alert_def"]["id"])),
                "collapse_key": kwargs.get("collapse_key",
                                           "{}:{}".format(alert['alert_def']['id'], alert['entity']['id']))
            },
            "alert_id": alert['alert_def']['id'],
            "entity_id": alert['entity']['id'],
            "team": kwargs.get('team', alert['alert_def'].get('team', '')),
            "priority": alert["alert_def"]["priority"]
        }

        url = url + '/api/v1/publish'

        try:
            # logger.info("Sending push notification to %s %s", url, message)
            r = requests.post(url, headers={"Authorization": "PreShared " + key, 'Content-Type': 'application/json'},
                              data=json.dumps(message))
            r.raise_for_status()
        except Exception:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})

        return repeat
Exemple #33
0
def get_dynamodb_tables(region, acc, **kwargs):
    tables = []

    # catch exception here, original agent policy does not allow scanning dynamodb
    try:
        ddb = boto3.client('dynamodb', region_name=region)

        paginator = ddb.get_paginator('list_tables')

        ts = call_and_retry(lambda: paginator.paginate(PaginationConfig={
            'MaxItems': MAX_PAGE
        }).build_full_result()['TableNames'])

        tables = []

        for tn in ts:
            t = call_and_retry(ddb.describe_table, TableName=tn)['Table']

            if t['TableStatus'] not in ['ACTIVE', 'UPDATING']:
                continue

            table = {
                'id':
                entity_id('dynamodb-{}[{}:{}]'.format(t['TableName'], acc,
                                                      region)),
                'region':
                region,
                'created_by':
                'agent',
                'infrastructure_account':
                '{}'.format(acc),
                'type':
                'dynamodb',
                'name':
                '{}'.format(t['TableName']),
                'arn':
                '{}'.format(t['TableArn'])
            }

            tables.append(table)
    except Exception:
        current_span = extract_span_from_kwargs(**kwargs)
        current_span.log_kv({
            'exception':
            'Got exception while listing dynamodb tables, IAM role has no access?'
        })
        logger.exception(
            'Got exception while listing dynamodb tables, IAM role has no access?'
        )

    return tables
def get_elastigroup_resources(cf, stack_name, **kwargs):
    """
    Extracts the Elastigroups from existing stacks, including the respective API access tokens and cloud account IDs
    It returns those parameters from the resource of Type ``Custom::elastigroup``
    found in the stack with the name provided as arguments
    """
    groups = []
    current_span = extract_span_from_kwargs(**kwargs)
    current_span.set_tag('stack_name', stack_name)
    paginator = cf.get_paginator('list_stack_resources')
    try:
        resources = call_and_retry(lambda: paginator.paginate(
            PaginationConfig={
                'MaxItems': MAX_PAGE
            }, StackName=stack_name).build_full_result()[
                'StackResourceSummaries'])
        for resource in resources:
            elastigroups = []
            if resource['ResourceType'] == ELASTIGROUP_RESOURCE_TYPE:
                elastigroups.append(resource)

            if elastigroups:
                resources = cf.get_template(
                    StackName=stack_name)['TemplateBody']['Resources']
                for elastigroup in elastigroups:
                    group_id = elastigroup["PhysicalResourceId"]
                    group_name = elastigroup["LogicalResourceId"]
                    spotinst_token = resources[group_name]['Properties'][
                        'accessToken']
                    spotinst_account_id = resources[group_name]['Properties'][
                        'accountId']
                    groups.append(
                        Elastigroup(group_id, group_name, spotinst_account_id,
                                    spotinst_token))
    except Exception as e:
        if isinstance(
                e,
                ClientError) and e.response['Error']['Code'] == 'AccessDenied':
            msg = 'Access to AWS API denied. You may need the cloudformation:ListStackResources and ' \
                          'cloudformation:GetTemplate permissions'
            logger.warning(msg)
            current_span.log_kv({'message': msg})
            current_span.set_tag('access_denied', True)
        else:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception(
                'Failed to retrieve Elastigroup resources from Stack "{}"'.
                format(stack_name))

    return groups
Exemple #35
0
def get_instance_events(aws_client, instance, **kwargs):
    try:
        instance_status_resp = call_and_retry(aws_client.describe_instance_status,
                                              InstanceIds=[instance['InstanceId']])

        if 'Events' in instance_status_resp['InstanceStatuses'][0]:
            return instance_status_resp['InstanceStatuses'][0]['Events']
    except Exception:
        current_span = extract_span_from_kwargs(**kwargs)
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed to retrieve instance events for instance: {}'.format(instance['InstanceId']))

    return []
Exemple #36
0
    def create_downtime(self, downtime: dict, **kwargs) -> dict:
        """
        Create a downtime for specific entities.

        Atrributes ``entities`` list, ``start_time`` and ``end_time`` timestamps are required.

        :param downtime: Downtime dict.
        :type downtime: dict

        :return: Downtime dict.
        :rtype: dict

        Example downtime:

        .. code-block:: json

            {
                "entities": ["entity-id-1", "entity-id-2"],
                "comment": "Planned maintenance",
                "start_time": 1473337437.312921,
                "end_time": 1473341037.312921,
            }
        """
        current_span = extract_span_from_kwargs(**kwargs)
        if not downtime.get('entities'):
            current_span.set_tag('error', True)
            current_span.log_kv(
                {'exception': 'At least one entity ID should be specified'})
            raise ZmonArgumentError(
                'At least one entity ID should be specified')

        if not downtime.get('start_time') or not downtime.get('end_time'):
            current_span.set_tag('error', True)
            current_span.log_kv({
                'exception':
                'Downtime must specify "start_time" and "end_time"'
            })
            raise ZmonArgumentError(
                'Downtime must specify "start_time" and "end_time"')

        current_span.set_tag('entity_ids', str(downtime.get('entities')))
        # FIXME - those also?
        # current_span.set_tag('start_time', str(downtime.get('start_time')))
        # current_span.set_tag('end_time', str(downtime.get('end_time')))

        resp = self.session.post(self.endpoint(DOWNTIME),
                                 json=downtime,
                                 timeout=self._timeout)

        return self.json(resp)
Exemple #37
0
def add_entity(zmon_client, entity, **kwargs):
    current_span = extract_span_from_kwargs(**kwargs)
    current_span.set_tag('entity_type', entity['type'])
    current_span.set_tag('entity_id', entity['id'])
    try:
        logger.info('Adding new {} entity with ID: {}'.format(
            entity['type'], entity['id']))
        zmon_client.add_entity(entity)
        return 0
    except Exception:
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed to add entity: {}'.format(entity))
        return 1
Exemple #38
0
    def get(cls, **kwargs) -> dict:
        current_span = extract_span_from_kwargs(**kwargs)

        report_type = kwargs.get('report_type')
        if report_type not in REPORT_TYPES:
            raise ProblemException(
                status=404,
                title='Resource not found',
                detail='Report type ({}) is invalid. Supported types are: {}'.
                format(report_type, REPORT_TYPES))

        product_id = kwargs.get('product_id')
        product = Product.query.get_or_404(product_id)

        objectives = product.objectives.all()

        now = datetime.utcnow()
        start = now - relativedelta(days=7)

        if report_type != 'weekly':
            months = 1 if report_type == 'monthly' else 3
            start = now - relativedelta(months=months)

        unit = 'day' if report_type == 'weekly' else 'week'

        current_span.set_tag('report_type', report_type)
        current_span.set_tag('product_id', product_id)
        current_span.set_tag('product', product.name)
        current_span.set_tag('product_slug', product.slug)
        current_span.set_tag('product_group', product.product_group.name)
        current_span.log_kv({
            'report_duration_start': start,
            'report_duration_end': now
        })

        slo = get_report_summary(objectives, unit, start, now, current_span)

        current_span.log_kv({
            'report_objective_count': len(slo),
            'objective_count': len(objectives)
        })

        return {
            'product_name': product.name,
            'product_slug': product.slug,
            'product_group_name': product.product_group.name,
            'product_group_slug': product.product_group.slug,
            'department': product.product_group.department,
            'slo': slo,
        }
Exemple #39
0
    def delete_alert_definition(self, alert_definition_id: int, **kwargs) -> dict:
        """
        Delete existing alert definition.

        :param alert_definition_id: ZMON alert definition ID.
        :type alert_definition_id: int

        :return: Alert definition dict.
        :rtype: dict
        """
        current_span = extract_span_from_kwargs(**kwargs)
        current_span.set_tag('alert_id', str(alert_definition_id))
        resp = self.session.delete(self.endpoint(ALERT_DEF, alert_definition_id))

        return self.json(resp)
    def notify(cls, alert, *args, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        alert_def = alert['alert_def']
        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        url = kwargs.get('webhook',
                         cls._config.get('notifications.slack.webhook'))
        repeat = kwargs.get('repeat', 0)

        current_span.log_kv({'channel': kwargs.get('channel')})

        if not url:
            current_span.set_tag('notification_invalid', True)
            current_span.log_kv({'reason': 'Missing webhook!'})
            raise NotificationError('Webhook is required!')

        message = {
            'username': '******',
            'channel': kwargs.get('channel', '#general'),
            'text': kwargs.get('message', cls._get_subject(alert)),
            'icon_emoji': ':bar_chart:',
        }

        headers = {
            'User-agent': get_user_agent(),
            'Content-type': 'application/json',
        }

        try:
            logger.info('Sending to %s %s', url, message)
            r = requests.post(url, json=message, headers=headers, timeout=5)
            r.raise_for_status()
        except Exception as e:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': str(e)})
            logger.exception('Slack notification failed!')

        return repeat
Exemple #41
0
    def notify(cls, alert, *args, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        alert_def = alert['alert_def']
        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        provider_url = cls._config.get('notifications.sms.provider_url', SMS_PROVIDER_URL)
        phone_numbers = BaseNotification.resolve_group(args, phone=True)
        repeat = kwargs.get('repeat', 0)

        maxlen = cls._config.get('notifications.sms.maxlength', SMS_MAXLENGTH)
        message = cls._get_subject(alert, custom_message=kwargs.get('message'))[:maxlen]

        request_params = {
            'to': '',
            'key': cls._config['notifications.sms.apikey'],
            'from': cls._config.get('notifications.sms.sender', SMS_SENDER),
            'route': cls._config.get('notifications.sms.route', SMS_ROUTE),
            'message': message,
            'cost': 1,
            'message_id': 1,
        }

        try:
            if cls._config.get('notifications.sms.on', True):
                for phone in phone_numbers:
                    request_params['to'] = phone
                    r = requests.get(provider_url, params=request_params, verify=False)
                    url_secured = r.url.replace(request_params['key'], '*' * len(request_params['key']))
                    logger.info('SMS sent: request to %s --> status: %s, response headers: %s, response body: %s',
                                url_secured, r.status_code, r.headers, r.text)
                    r.raise_for_status()
        except Exception:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception('Failed to send sms for alert %s with id %s to: %s', alert_def['name'], alert_def['id'],
                             list(phone_numbers))
        finally:
            return repeat
Exemple #42
0
def get_cluster_statefulsets(kube_client, cluster_id, alias, environment, region, infrastructure_account,
                             namespace='default', **kwargs) -> list:
    current_span = extract_span_from_kwargs(**kwargs)

    entities = []

    statefulsets = get_all(kube_client, kube_client.get_statefulsets, namespace, span=current_span)

    for statefulset in statefulsets:
        obj = statefulset.obj

        # Stale replic set?!
        if obj['spec'].get('replicas', 0) == 0:
            continue

        containers = obj['spec'].get('template', {}).get('spec', {}).get('containers', [])

        entity = {
            'id': 'statefulset-{}-{}[{}]'.format(statefulset.name, statefulset.namespace, cluster_id),
            'type': STATEFULSET_TYPE,
            'kube_cluster': cluster_id,
            'alias': alias,
            'environment': environment,
            'created_by': AGENT_TYPE,
            'infrastructure_account': infrastructure_account,
            'region': region,

            'statefulset_name': statefulset.name,
            'statefulset_namespace': obj['metadata']['namespace'],
            'statefulset_service_name': obj['spec']['serviceName'],

            'volume_claims': {
                v['metadata']['name']: v['status'].get('phase', 'UNKNOWN')
                for v in obj['spec'].get('volumeClaimTemplates', [])
            },
            'containers': {c['name']: c.get('image', '') for c in containers if 'name' in c},

            'replicas': obj['spec'].get('replicas'),
            'replicas_status': obj['status'].get('replicas'),
            'actual_replicas': obj['status'].get('readyReplicas'),
            'version': obj['metadata'].get('labels', {}).get('version', '')
        }

        entity.update(entity_labels(obj, 'labels', 'annotations'))

        entities.append(entity)

    return entities
Exemple #43
0
def remove_entity(zmon_client, entity_id, **kwargs):
    current_span = extract_span_from_kwargs(**kwargs)
    current_span.set_tag('entity_id', entity_id)
    try:
        logger.info('Removing entity with id: {}'.format(entity_id))

        deleted = zmon_client.delete_entity(entity_id)

        if not deleted:
            logger.error('Failed to delete entity!')
            return 1
    except Exception:
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Exception while deleting entity: {}'.format(entity_id))
        return 1
    return 0
Exemple #44
0
    def notify(cls, alert, queue, hubot_url, message=None, repeat=0, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        message = cls._get_subject(alert, custom_message=message)

        alert_def = alert['alert_def']
        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        if '?' in hubot_url:
            current_span.set_tag('notification_invalid', True)
            current_span.log_kv({'reason': 'Invalid URL!'})
            raise ValueError

        post_params = {
            'event': queue,
            'data': message,
        }

        try:
            r = requests.post(hubot_url, data=post_params)
            r.raise_for_status()
            logger.info('Notification sent: request to %s --> status: %s, response headers: %s, response body: %s',
                        hubot_url, r.status_code, r.headers, r.text)
        except Exception:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception(
                'Failed to send notification for alert %s with id %s to: %s', alert_def['name'], alert_def['id'],
                hubot_url)
        finally:
            return repeat
Exemple #45
0
def get_dynamodb_tables(region, acc, **kwargs):
    tables = []

    # catch exception here, original agent policy does not allow scanning dynamodb
    try:
        ddb = boto3.client('dynamodb', region_name=region)

        paginator = ddb.get_paginator('list_tables')

        ts = call_and_retry(
            lambda: paginator.paginate(PaginationConfig={'MaxItems': MAX_PAGE}).build_full_result()['TableNames'])

        tables = []

        for tn in ts:
            t = call_and_retry(ddb.describe_table, TableName=tn)['Table']

            if t['TableStatus'] not in ['ACTIVE', 'UPDATING']:
                continue

            table = {
                'id': entity_id('dynamodb-{}[{}:{}]'.format(t['TableName'], acc, region)),
                'region': region,
                'created_by': 'agent',
                'infrastructure_account': '{}'.format(acc),
                'type': 'dynamodb',
                'name': '{}'.format(t['TableName']),
                'arn': '{}'.format(t['TableArn'])
            }

            tables.append(table)
    except Exception:
        current_span = extract_span_from_kwargs(**kwargs)
        current_span.log_kv({'exception': 'Got exception while listing dynamodb tables, IAM role has no access?'})
        logger.exception('Got exception while listing dynamodb tables, IAM role has no access?')

    return tables
Exemple #46
0
    def notify(cls, alert, url=None, body=None, params=None, headers=None, timeout=5, oauth2=False, include_alert=True,
               repeat=0, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        urls = cls._config.get('notifications.http.whitelist.urls', [])
        allow_any = cls._config.get('notifications.http.allow.all', False)
        default_url = cls._config.get('notifications.http.default.url', None)

        alert_def = alert['alert_def']
        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity', {})
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity.get('id'))
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        if isinstance(urls, basestring):
            urls = urls.replace(' ', '').split(',')

        if not url and not default_url:
            current_span.set_tag('notification_invalid', True)
            current_span.log_kv({'reason': 'Missing URL!'})
            raise NotificationError('URL is required!')

        if not url:
            url = default_url
        elif not allow_any and url not in urls:
            current_span.set_tag('notification_invalid', True)
            current_span.log_kv({'reason': 'URL is not in whitelist'})
            raise NotificationError('URL "{}" is not allowed. Please check worker white list URLs.'.format(url))

        if not is_absolute_http_url(url):
            current_span.set_tag('notification_invalid', True)
            current_span.log_kv({'reason': 'Absolute URL required!'})
            raise NotificationError('Absolute URL is required!')

        # HTTP headers.
        if not headers:
            headers = {}

        default_headers = cls._config.get('notifications.http.headers', {})
        default_headers.update(headers)

        if oauth2:
            headers.update({'Authorization': 'Bearer {}'.format(tokens.get('uid'))})

        headers['User-Agent'] = get_user_agent()

        if include_alert:
            data = {
                'alert': alert,
                'body': body,
            }
        else:
            data = body

        try:
            logger.info('Sending HTTP POST request to {}'.format(url))
            r = requests.post(url, data=json.dumps(data, cls=JsonDataEncoder), params=params,
                              headers=headers, timeout=timeout)

            r.raise_for_status()
        except Exception:
            current_span.set_tag('error', True)
            logger.exception('Request failed!')

        return repeat
Exemple #47
0
    def notify(cls, alert, *args, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        repeat = kwargs.get('repeat', 0)
        alert_def = alert['alert_def']
        per_entity = kwargs.get('per_entity', True)

        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity', {})
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity.get('id'))
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        if not cls._config.get('notifications.mail.on', True):
            current_span.set_tag('mail_enabled', False)
            logger.info('Not sending email for alert: {}. Mail notification is not enabled.'.format(alert_def['id']))
            return repeat

        if not is_changed and not per_entity:
            return repeat

        sender = cls._config.get('notifications.mail.sender')
        subject = cls._get_subject(alert, custom_message=kwargs.get('subject'))
        html = kwargs.get('html', False)

        cc = kwargs.get('cc', [])
        if type(cc) is not list:
            cc = [cc]

        hide_recipients = kwargs.get('hide_recipients', True)
        include_value = kwargs.get('include_value', True)
        include_definition = kwargs.get('include_definition', True)
        include_captures = kwargs.get('include_captures', True)
        include_entity = kwargs.get('include_entity', True)
        expanded_alert_name = cls._get_expanded_alert_name(alert)

        zmon_host = kwargs.get('zmon_host', cls._config.get('zmon.host'))
        alert_url = urlparse.urljoin(zmon_host, '/#/alert-details/{}'.format(alert_def['id'])) if zmon_host else ''

        try:
            tmpl = jinja_env.get_template('alert.txt')
            body_plain = tmpl.render(expanded_alert_name=expanded_alert_name,
                                     include_value=include_value,
                                     include_definition=include_definition,
                                     include_captures=include_captures,
                                     include_entity=include_entity,
                                     alert_url=alert_url,
                                     **alert)
        except Exception:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception('Error parsing email template for alert %s with id %s', alert_def['name'], alert_def['id'])
        else:
            if html:
                current_span.set_tag('html', True)
                msg = MIMEMultipart('alternative')
                tmpl = jinja_env.get_template('alert.html')
                body_html = tmpl.render(expanded_alert_name=expanded_alert_name,
                                        include_value=include_value,
                                        include_definition=include_definition,
                                        include_captures=include_captures,
                                        include_entity=include_entity,
                                        alert_url=alert_url,
                                        **alert)
                part1 = MIMEText(body_plain.encode('utf-8'), 'plain', 'utf-8')
                part2 = MIMEText(body_html.encode('utf-8'), 'html', 'utf-8')
                msg.attach(part1)
                msg.attach(part2)
            else:
                msg = MIMEText(body_plain.encode('utf-8'), 'plain', 'utf-8')

            msg['Subject'] = subject
            msg['From'] = 'ZMON 2 <{}>'.format(sender)

            args = BaseNotification.resolve_group(args)

            if hide_recipients:
                msg['To'] = 'Undisclosed Recipients <{}>'.format(sender)
                msg['Bcc'] = ', '.join(args)
            else:
                msg['To'] = ', '.join(args)
            msg['Cc'] = ', '.join(cc)

            mail_host = cls._config.get('notifications.mail.host', 'localhost')
            mail_port = cls._config.get('notifications.mail.port', '25')

            try:
                if mail_host != 'localhost':
                    if cls._config.get('notifications.mail.tls', False):

                        logger.info('Mail notification using TLS!')
                        current_span.set_tag('tls', True)

                        s = smtplib.SMTP(mail_host, mail_port)
                        s.ehlo()
                        if not s.has_extn('STARTTLS'):
                            raise NotificationError('Mail server ({}) does not support TLS!'.format(mail_host))
                        s.starttls()
                        s.ehlo()
                    else:
                        current_span.set_tag('tls', False)
                        s = smtplib.SMTP_SSL(mail_host, mail_port)
                else:
                    s = smtplib.SMTP(mail_host, mail_port)

            except Exception:
                current_span.set_tag('error', True)
                logger.exception('Error connecting to SMTP server %s for alert %s with id %s',
                                 mail_host, alert_def['name'], alert_def['id'])
            else:
                try:
                    mail_user = cls._config.get('notifications.mail.user', None)
                    if mail_user is not None:
                        s.login(mail_user, cls._config.get('notifications.mail.password'))

                    s.sendmail(sender, list(args) + cc, msg.as_string())
                except SMTPAuthenticationError:
                    logger.exception(
                        'Error sending email for alert %s with id %s: authentication failed for %s',
                        alert_def['name'], alert_def['id'], mail_user)
                except Exception:
                    current_span.set_tag('error', True)
                    current_span.log_kv({'exception': traceback.format_exc()})
                    logger.exception(
                        'Error sending email for alert %s with id %s', alert_def['name'], alert_def['id'])
                finally:
                    s.quit()
        finally:
            return repeat
Exemple #48
0
def get_running_apps(region, existing_entities=None, **kwargs):
    aws_client = boto3.client('ec2', region_name=region)

    paginator = aws_client.get_paginator('describe_instances')
    rs = call_and_retry(
        lambda: paginator.paginate(PaginationConfig={'MaxItems': MAX_PAGE}).build_full_result()['Reservations'])

    now = datetime.now()

    existing_instances = (
        {e['aws_id']: e for e in existing_entities if e['type'] == 'instance'} if existing_entities else {}
    )

    result = []
    images = set()

    for r in rs:

        owner = r['OwnerId']

        instances = r['Instances']

        for i in instances:

            if str(i['State']['Name']) != 'running':
                continue

            if (now.minute % 7) and i['InstanceId'] in existing_instances:
                ins = existing_instances[i['InstanceId']]
                if 'image' in ins:
                    images.add(ins['image']['id'])
            else:

                user_data = None
                try:
                    user_data_response = call_and_retry(aws_client.describe_instance_attribute,
                                                        InstanceId=i['InstanceId'],
                                                        Attribute='userData')

                    user_data = base64.b64decode(user_data_response['UserData']['Value'])
                    user_data = yaml.safe_load(user_data)
                except Exception:
                    pass

                tags = get_tags_dict(i.get('Tags', []))

                is_spot_instance = True if i.get('InstanceLifecycle', '') == 'spot' else False

                ins = {
                    'type': 'instance',
                    'created_by': 'agent',
                    'region': region,
                    'ip': i['PrivateIpAddress'],
                    'host': i['PrivateIpAddress'],
                    'instance_type': i['InstanceType'],
                    'spot_instance': is_spot_instance,
                    'aws_id': i['InstanceId'],
                    'infrastructure_account': 'aws:{}'.format(owner),
                }

                ins['image'] = {}
                if 'ImageId' in i:
                    images.add(i['ImageId'])
                    ins['image'] = {'id': i['ImageId']}

                ins['block_devices'] = get_instance_devices(aws_client, i)

                if 'PublicIpAddress' in i:
                    public_ip = i.get('PublicIpAddress')
                    if public_ip != '' and public_ip is not None:
                        ins.update({'public_ip': public_ip})

                # for now limit us to instances with valid user data ( senza/taupage )
                if isinstance(user_data, dict) and 'application_id' in user_data:
                    ins['state_reason'] = i['StateTransitionReason']

                    ins['events'] = []

                    stack_version = user_data.get('application_version', 'NOT_SET')
                    if 'StackVersion' in tags:
                        ins['stack'] = tags['Name']
                        stack_version = tags['StackVersion']
                        if 'aws:cloudformation:logical-id' in tags:
                            ins['resource_id'] = tags['aws:cloudformation:logical-id']

                    ins['id'] = entity_id('{}-{}-{}[aws:{}:{}]'.format(user_data['application_id'],
                                                                       stack_version,
                                                                       get_hash(i['PrivateIpAddress'] + ''),
                                                                       owner,
                                                                       region))

                    ins['application_id'] = user_data['application_id']

                    if 'application_version' in user_data:
                        ins['application_version'] = user_data['application_version']

                    ins['source'] = user_data['source']
                    ins['source_base'] = ins['source'].split(":")[0]

                    if 'ports' in user_data:
                        ins['ports'] = user_data['ports']

                    ins['runtime'] = user_data['runtime']

                    # `tags` is already a dict, but we need the raw list
                    assign_properties_from_tags(ins, i.get('Tags', []))

                    add_traffic_tags_to_entity(ins)

                    zlogging = user_data.get('logging', {})

                    ins['fluentd_enabled'] = 'false'

                    if zlogging.get('fluentd_enabled') is True:
                        ins['fluentd_enabled'] = 'true'

                else:
                    ins['id'] = entity_id('{}-{}[aws:{}:{}]'.format(tags.get('Name') or i['InstanceId'],
                                                                    get_hash(i['PrivateIpAddress'] + ''),
                                                                    owner, region))

                    # `tags` is already a dict, but we need the raw list
                    assign_properties_from_tags(ins, i.get('Tags', []))

                    if 'Name' in tags:
                        ins['name'] = tags['Name'].replace(' ', '-')

            if 'application_id' in ins:
                if not (now.minute % 10):
                    ins['events'] = get_instance_events(aws_client, i)
                    ins['block_devices'] = get_instance_devices(aws_client, i)
                else:
                    e = existing_instances.get(ins.get('aws_id', None), None)
                    if e and 'events' in e:
                        ins['events'] = e['events']

            result.append(ins)

    imgs = []
    # prevent fetching all images (in case the images is empty, it will do so):
    if list(images):
        try:
            imgs = aws_client.describe_images(ImageIds=list(images))['Images']
            for i in result:
                if 'image' not in i or 'id' not in i['image']:
                    continue
                for img in imgs:
                    if img['ImageId'] == i['image']['id']:
                        i['image']['name'] = img.get('Name', 'UNKNOWN')
                        date = img.get('CreationDate', '1970-01-01T00:00:00.000+00:00').replace('Z', '+00:00')
                        i['image']['date'] = date
                        break
        except Exception:
            current_span = extract_span_from_kwargs(**kwargs)
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception('Failed to retrieve image descriptions')

    return result
Exemple #49
0
    def notify(cls,
               alert,
               teams=None,
               per_entity=False,
               include_alert=True,
               include_captures=False,
               priority=None,
               message='',
               description='',
               custom_fields=None,
               **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        url = 'https://api.opsgenie.com/v2/alerts'

        repeat = kwargs.get('repeat', 0)

        # Auth key!
        api_key = kwargs.get('api_key', cls._config.get('notifications.opsgenie.apikey'))
        zmon_host = kwargs.get('zmon_host', cls._config.get('zmon.host'))

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        alert_def = alert['alert_def']
        current_span.set_tag('alert_id', alert_def['id'])

        if not api_key:
            current_span.set_tag('notification_invalid', True)
            current_span.log_kv({'reason': 'API key is required!'})
            raise NotificationError('API key is required!')

        if not isinstance(teams, (list, basestring)):
            current_span.set_tag('notification_invalid', True)
            current_span.log_kv({'reason': 'Missing team!'})
            raise NotificationError('Missing "teams" parameter. Either a team name or list of team names is required.')

        current_span.log_kv({'teams': teams})

        if priority and priority not in PRIORITIES:
            current_span.set_tag('notification_invalid', True)
            current_span.log_kv({'reason': 'Invalid priorities'})
            raise NotificationError('Invalid priority. Valid values are: {}'.format(PRIORITIES))

        if teams and isinstance(teams, basestring):
            teams = [{'name': teams}]
        else:
            teams = [{'name': t} for t in teams]

        if not is_changed and not per_entity:
            return repeat

        alert_id = alert['alert_def']['id']
        alias = 'ZMON-{}'.format(alert_id) if not per_entity else 'ZMON-{}-{}'.format(alert_id, entity['id'])

        note = alert_url = urlparse.urljoin(zmon_host, '/#/alert-details/{}'.format(alert_id)) if zmon_host else ''

        if not priority:
            priority = 'P1' if int(alert['alert_def']['priority']) == 1 else 'P3'

        responsible_team = alert['alert_def'].get('responsible_team', teams[0]['name'])
        msg = message if message else cls._get_subject(alert, include_event=False)

        details = {
            'alert_evaluation_ts': alert.get('alert_evaluation_ts', time.time())
        }

        alert_details = {
            'worker': alert['worker'],
            'zmon_team': alert['alert_def']['team'],
            'entity': entity['id'],
            'infrastructure_account': entity.get('infrastructure_account', 'UNKNOWN'),
            'alert_url': alert_url,
        }

        params = {}

        if is_alert:
            tags = alert['alert_def'].get('tags', [])
            tags.append(alert['alert_def']['id'])
            data = {
                'alias': alias,
                'teams': teams,
                'message': '[{}] - {}'.format(responsible_team, msg),  # TODO: remove when it is no longer needed!
                'source': alert.get('worker', ''),
                'description': description,
                'entity': entity['id'],
                'note': note,
                'priority': priority,
                'tags': tags,
                'details': details,
            }

            if isinstance(custom_fields, dict):
                data['details'].update(custom_fields)

            if include_alert:
                data['details'].update(alert_details)
            if include_captures:
                data['details'].update(alert.get('captures'))
        else:
            logger.info('Closing Opsgenie alert {}'.format(alias))

            url = 'https://api.opsgenie.com/v2/alerts/{}/close'.format(alias)
            data = {
                'user': '******',
                'source': alert.get('worker', 'ZMON Worker'),
                'note': note,
            }

            params = {'identifierType': 'alias'}

        try:
            logger.info('Notifying Opsgenie %s %s', url, message)
            headers = {
                'User-Agent': get_user_agent(),
                'Content-type': 'application/json',
                'Authorization': 'GenieKey {}'.format(api_key),
            }

            r = requests.post(url, data=json.dumps(data, cls=JsonDataEncoder, sort_keys=True), headers=headers,
                              timeout=5, params=params)

            r.raise_for_status()
        except requests.HTTPError as e:
            current_span.set_tag('error', True)
            logger.error('HTTP Error ({}) {}'.format(e.response.status_code, e.response.text))
        except Exception:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception('Notifying Opsgenie failed')

        return repeat
    def notify(cls, alert, *args, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        webhook_link = kwargs.get('webhook_link', 'http://no.webhook.link?wrong')
        multiline = kwargs.get('multiline', True)
        webhook_link_split = webhook_link.split('?')
        alert_id = alert['alert_def']['id']
        webhook_link = webhook_link_split[0] + '?threadKey={}&'.format(alert_id) + webhook_link_split[1]

        repeat = kwargs.get('repeat', 0)
        alert_def = alert['alert_def']

        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        current_span.log_kv({'room': kwargs.get('room')})

        color = '#0CB307' if alert and not alert.get('is_alert') else kwargs.get('color', '#FF0000')
        logo = 'FLIGHT_ARRIVAL' if alert and not alert.get('is_alert') else kwargs.get('logo', 'FLIGHT_DEPARTURE')

        message_text = cls._get_subject(alert, custom_message=kwargs.get('message'))

        zmon_host = kwargs.get('zmon_host', cls._config.get('zmon.host'))
        alert_url = urlparse.urljoin(zmon_host, '/#/alert-details/{}'.format(alert_id)) if zmon_host else ''

        message = {
            "cards": [
                {
                    "sections": [
                        {
                            "widgets": [
                                {
                                    "keyValue": {
                                        "content": '<font color="{}">{}!</font>'.format(color, message_text),
                                        "contentMultiline": multiline,
                                        "onClick": {
                                             "openLink": {
                                                "url": "{}".format(alert_url)
                                             }
                                         },
                                        "icon": "{}".format(logo)
                                     }
                                }
                            ]
                        }
                    ]
                }
            ]
        }

        try:
            logger.info(
                'Sending to: ' + '{}'.format(webhook_link) + ' ' + json.dumps(message))
            r = requests.post(
                '{}'.format(webhook_link),
                json=message,
                headers={'Content-type': 'application/json'},
                timeout=5)
            r.raise_for_status()
        except Exception:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception('Google Hangouts Chat write failed!')

        return repeat
Exemple #51
0
def get_sqs_queues(region, acc, all_entities=None, **kwargs):
    current_span = extract_span_from_kwargs(**kwargs)
    if all_entities is None:
        all_entities = []
    sqs_queues = []

    try:
        sqs_client = boto3.client('sqs', region_name=region)
        list_queues_response = call_and_retry(sqs_client.list_queues) or {}
        existing_entities = {e['url']: e for e in all_entities if e['type'] == 'aws_sqs'}
        for queue_url in list_queues_response.get('QueueUrls', []):
            try:
                existing_entity = existing_entities.get(queue_url, None)
                if existing_entity and (datetime.now().minute % 15):
                    sqs_queues.append(existing_entity)
                else:
                    attributes_response = call_and_retry(sqs_client.get_queue_attributes, QueueUrl=queue_url,
                                                         AttributeNames=['All'])
                    attributes = attributes_response['Attributes']
                    queue_arn = attributes['QueueArn']
                    arn_tokens = queue_arn.split(':')
                    if len(arn_tokens) == 6:
                        queue_name = arn_tokens[-1]
                    else:
                        logger.error('Illegal SQS queue ARN: "%s" while processing url %s', queue_arn, queue_url)
                        continue

                    sqs_entity = {
                        'id': entity_id('sqs-{}[{}:{}]'.format(queue_name, acc, region)),
                        'created_by': 'agent',
                        'infrastructure_account': acc,
                        'region': region,
                        'type': 'aws_sqs',
                        'name': queue_name,
                        'url': queue_url,
                        'arn': queue_arn,
                        'message_retention_period_seconds': int(attributes.get('MessageRetentionPeriod', 345600)),
                        'maximum_message_size_bytes': int(attributes.get('MaximumMessageSize', 262144)),
                        'receive_messages_wait_time_seconds': int(attributes.get('ReceiveMessageWaitTimeSeconds', 0)),
                        'delay_seconds': int(attributes.get('DelaySeconds', 0)),
                        'visibility_timeout_seconds': int(attributes.get('VisibilityTimeout', 30))}

                    redrive_policy = json.loads(attributes.get('RedrivePolicy', '{}'))
                    dead_letter_target_arn = redrive_policy.get('deadLetterTargetArn', None)
                    if dead_letter_target_arn:
                        sqs_entity['redrive_policy_dead_letter_target_arn'] = dead_letter_target_arn
                    max_receive_count = redrive_policy.get('maxReceiveCount', None)
                    if max_receive_count:
                        sqs_entity['redrive_policy_max_receive_count'] = max_receive_count

                    dl_sources_response = call_and_retry(sqs_client.list_dead_letter_source_queues, QueueUrl=queue_url)
                    dead_letter_source_urls = dl_sources_response.get('queueUrls', None)
                    if dead_letter_source_urls:
                        sqs_entity['redrive_policy_dead_letter_source_urls'] = dead_letter_source_urls

                    sqs_queues.append(sqs_entity)
            except Exception:
                current_span.set_tag('error', True)
                current_span.log_kv({'exception': traceback.format_exc()})
                logger.exception('Failed to obtain details about queue with url="%s"', queue_url)
    except Exception as e:
        if isinstance(e, ClientError) and e.response['Error']['Code'] == 'AccessDenied':
            logger.warning('Access to AWS SQS denied. Skip queue discovery.')
        else:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception('Failed to list SQS queues.')

    return sqs_queues
Exemple #52
0
def get_limits(region, acc, apps, elbs, entities, **kwargs):
    current_span = extract_span_from_kwargs(**kwargs)
    limits = {
        'ec2-max-instances': 20,
        'ec2-max-spot-instances': 20,  # Assume default max-spot-instances
        'elb-max-count': 20,
    }
    for e in entities:
        if e.get('type') == 'aws_limits':
            limits.update(e)
            break

    limits.update({
        'ec2-used-instances': len([a for a in apps if a['type'] == 'instance' and not a.get('spot_instance', False)]),
        'ec2-used-spot-instances': len([a for a in apps if a['type'] == 'instance' and a.get('spot_instance', False)]),
        'elb-used-count': len(elbs),
    })

    ec2 = boto3.client('ec2', region_name=region)
    rds = boto3.client('rds', region_name=region)
    asg = boto3.client('autoscaling', region_name=region)
    iam = boto3.client('iam', region_name=region)

    try:
        attrs = ec2.describe_account_attributes()['AccountAttributes']
        for attr in attrs:
            if attr['AttributeName'] == 'max-instances':
                limits['ec2-max-instances'] = int(attr['AttributeValues'][0]['AttributeValue'])
    except Exception:
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed to query EC2 account attributes!')

    try:
        quota_names = ('ReservedDBInstances', 'AllocatedStorage')
        quotas = rds.describe_account_attributes()['AccountQuotas']
        q = {
            q['AccountQuotaName']: q for q in quotas if q['AccountQuotaName'] in quota_names
        }
        limits['rds-max-reserved'] = q['ReservedDBInstances']['Max']
        limits['rds-used-reserved'] = q['ReservedDBInstances']['Used']
        limits['rds-max-allocated'] = q['AllocatedStorage']['Max']
        limits['rds-used-allocated'] = q['AllocatedStorage']['Used']
    except Exception:
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed to query RDS account attributes!')

    try:
        asg_limits = asg.describe_account_limits()
        limits['asg-max-groups'] = asg_limits['MaxNumberOfAutoScalingGroups']
        limits['asg-max-launch-configurations'] = asg_limits['MaxNumberOfLaunchConfigurations']
        limits['asg-used-groups'] = asg_limits['NumberOfAutoScalingGroups']
        limits['asg-used-launch-configurations'] = asg_limits['NumberOfLaunchConfigurations']
    except Exception:
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed to query ASG limits!')

    try:
        iam_limits = iam.get_account_summary()['SummaryMap']
        limits['iam-used-server-certificates'] = iam_limits['ServerCertificates']
        limits['iam-max-server-certificates'] = iam_limits['ServerCertificatesQuota']

        limits['iam-used-instance-profiles'] = iam_limits['InstanceProfiles']
        limits['iam-max-instance-profiles'] = iam_limits['InstanceProfilesQuota']

        limits['iam-used-policies'] = iam_limits['Policies']
        limits['iam-max-policies'] = iam_limits['PoliciesQuota']

    except Exception:
        current_span.set_tag('error', True)
        current_span.log_kv({'exception': traceback.format_exc()})
        logger.exception('Failed to query IAM account summary!')

    entity = {
        'id': entity_id('aws-limits[{}:{}]'.format(acc, region)),
        'type': 'aws_limits',
        'created_by': 'agent',
        'region': region,
        'infrastructure_account': acc,
    }

    entity.update(limits)

    return entity