コード例 #1
0
    def check(self, instance):
        instance_name = instance.get("name")
        if instance_name is None:
            raise Exception("Each instance must have a unique name")

        server = instance.get("server")
        if server is None:
            raise Exception("Each instance must have a server")

        # Check the server URL for HTTP or HTTPS designation,
        #   fall back to http:// if no scheme present (allows for backwards compatibility).
        server = self._normalize_server_url(server)

        build_conf = instance.get("build_configuration")
        if build_conf is None:
            raise Exception("Each instance must have a build configuration")

        host = instance.get("host_affected") or self.hostname
        tags = instance.get("tags")
        is_deployment = _is_affirmative(instance.get("is_deployment", False))
        basic_http_authentication = _is_affirmative(
            instance.get("basic_http_authentication", False))

        self._initialize_if_required(instance_name, server, build_conf,
                                     basic_http_authentication)

        # Look for new successful builds
        if basic_http_authentication:
            new_build_url = self.NEW_BUILD_URL_AUTHENTICATED.format(
                server=server,
                build_conf=build_conf,
                since_build=self.last_build_ids[instance_name])
        else:
            new_build_url = self.NEW_BUILD_URL.format(
                server=server,
                build_conf=build_conf,
                since_build=self.last_build_ids[instance_name])

        try:
            resp = self.http.get(new_build_url)
            resp.raise_for_status()

            new_builds = resp.json()

            if new_builds["count"] == 0:
                self.log.debug("No new builds found.")
            else:
                self._build_and_send_event(new_builds["build"][0],
                                           instance_name, is_deployment, host,
                                           tags)
        except requests.exceptions.HTTPError:
            self.log.exception("Couldn't fetch last build, got code %s",
                               resp.status_code)
            raise
        except Exception:
            self.log.exception(
                "Couldn't fetch last build, unhandled exception")
            raise
コード例 #2
0
    def check(self, _):
        url = self.instance.get("url")
        custom_tags = self.instance.get('tags', [])
        max_queues = int(self.instance.get("max_queues", MAX_ELEMENTS))
        max_topics = int(self.instance.get("max_topics", MAX_ELEMENTS))
        max_subscribers = int(
            self.instance.get("max_subscribers", MAX_ELEMENTS))
        detailed_queues = self.instance.get("detailed_queues", [])
        detailed_topics = self.instance.get("detailed_topics", [])
        detailed_subscribers = self.instance.get("detailed_subscribers", [])
        suppress_errors = _is_affirmative(
            self.instance.get("suppress_errors", False))

        tags = custom_tags + ["url:{0}".format(url)]

        self.log.debug("Processing ActiveMQ data for %s", url)
        data = self._fetch_data(url, QUEUE_URL, suppress_errors)
        if data:
            self._process_data(data, "queue", tags, max_queues,
                               detailed_queues)

        data = self._fetch_data(url, TOPIC_URL, suppress_errors)
        if data:
            self._process_data(data, "topic", tags, max_topics,
                               detailed_topics)

        data = self._fetch_data(url, SUBSCRIBER_URL, suppress_errors)
        if data:
            self._process_subscriber_data(data, tags, max_subscribers,
                                          detailed_subscribers)
コード例 #3
0
    def _collect_raw(self, ceph_cmd, ceph_cluster, instance):
        use_sudo = _is_affirmative(instance.get('use_sudo', False))
        ceph_args = []
        if use_sudo:
            test_sudo = os.system('setsid sudo -l < /dev/null')
            if test_sudo != 0:
                raise Exception('The dd-agent user does not have sudo access')
            ceph_args = 'sudo {}'.format(ceph_cmd)
        else:
            ceph_args = ceph_cmd

        ceph_args = '{} --cluster {}'.format(ceph_args, ceph_cluster)

        raw = {}
        for cmd in ('mon_status', 'status', 'df detail', 'osd pool stats',
                    'osd perf', 'health detail'):
            try:
                args = '{} {} -fjson'.format(ceph_args, cmd)
                output, _, _ = get_subprocess_output(args.split(), self.log)
                res = json.loads(output)
            except Exception as e:
                self.log.warning('Unable to parse data from cmd=%s: %s', cmd,
                                 e)
                continue

            name = cmd.replace(' ', '_')
            raw[name] = res

        return raw
コード例 #4
0
    def build_resource_filters(raw_filters):
        # type: (List[Dict[str, Any]]) -> Dict[str, List[ResourceFilter]]
        created_filters = {
            'included': [],
            'excluded': []
        }  # type: Dict[str, List[ResourceFilter]]
        for f in raw_filters:
            included = _is_affirmative(f.get('include', True))

            if f.get('pattern') is None or f.get('resource_type') is None:
                raise ConfigurationError(
                    'A resource filter requires at least a pattern and a resource_type'
                )
            if f['resource_type'] not in ALLOWED_RESOURCES_FOR_FILTERS:
                raise ConfigurationError('Unknown resource_type: {}'.format(
                    f['resource_type']))

            regex = re.compile(f['pattern'])
            if included:
                created_filters['included'].append(
                    ResourceFilter(f['resource_type'], regex, True,
                                   f.get('group')))
            else:
                created_filters['excluded'].append(
                    ResourceFilter(f['resource_type'], regex, False,
                                   f.get('group')))

        return created_filters
コード例 #5
0
 def __init__(self, instance):
     # type: (Dict[str, Any]) -> None
     self.url = instance.get('url', '')  # type: str
     if self.url == '':
         raise ConfigurationError("url is a required configuration.")
     self.tags = instance.get('tags', [])
     self.enable_health_service_checks = _is_affirmative(
         instance.get('enable_health_service_checks', False))
     self.resource_filters = self.build_resource_filters(
         instance.get('resource_filters', []))
コード例 #6
0
ファイル: ceph.py プロジェクト: DataDog/integrations-core
    def _collect_raw(self, ceph_cmd, ceph_cluster, instance):
        use_sudo = _is_affirmative(instance.get('use_sudo', False))
        if use_sudo:
            test_sudo = os.system('setsid sudo -l < /dev/null')
            if test_sudo != 0:
                raise CheckException(
                    'The dd-agent user does not have sudo access')
            ceph_args = 'sudo {}'.format(ceph_cmd)
        else:
            ceph_args = ceph_cmd

        ceph_args = '{} --cluster {}'.format(ceph_args, ceph_cluster)

        raw = {}
        for cmd in ('mon_status', 'status', 'df detail', 'osd pool stats',
                    'osd perf', 'health detail'):
            try:
                args = '{} {} -fjson'.format(ceph_args, cmd)
                output, _, _ = get_subprocess_output(args.split(), self.log)
                res = json.loads(output)
            except Exception as e:
                self.log.warning('Unable to parse data from cmd=%s: %s', cmd,
                                 e)
                continue

            name = cmd.replace(' ', '_')
            raw[name] = res

        mon_map = raw.get('status', {}).get('monmap')
        if mon_map is None:
            raise RuntimeError("Could not detect Ceph release series")
        if 'min_mon_release_name' in mon_map and mon_map[
                'min_mon_release_name'] == 'octopus':
            self.log.debug("Detected octopus version of ceph...")
            self._octopus = True
        else:
            self._octopus = False

        return raw
コード例 #7
0
    def check(self, instance):
        name = instance.get('name', None)
        tags = instance.get('tags', [])
        exact_match = _is_affirmative(instance.get('exact_match', True))
        search_string = instance.get('search_string', None)
        ignore_ad = _is_affirmative(instance.get('ignore_denied_access', True))
        pid = instance.get('pid')
        pid_file = instance.get('pid_file')
        collect_children = _is_affirmative(
            instance.get('collect_children', False))
        user = instance.get('user', False)
        try_sudo = instance.get('try_sudo', False)

        if self._conflicting_procfs:
            self.warning(
                'The `procfs_path` defined in `process.yaml is different from the one defined in '
                '`datadog.conf` This is currently not supported by the Agent. Defaulting to the '
                'value defined in `datadog.conf`: %s',
                psutil.PROCFS_PATH,
            )
        elif self._deprecated_init_procfs:
            self.warning(
                'DEPRECATION NOTICE: Specifying `procfs_path` in process.yaml` is deprecated. '
                'Please specify it in `datadog.conf` instead')

        if not isinstance(search_string,
                          list) and pid is None and pid_file is None:
            raise ValueError(
                '"search_string" or "pid" or "pid_file" parameter is required')

        # FIXME 8.x remove me
        if search_string is not None:
            if "All" in search_string:
                self.warning(
                    'Deprecated: Having "All" in your search_string will greatly reduce the '
                    'performance of the check and will be removed in a future version of the agent.'
                )

        if name is None:
            raise KeyError('The "name" of process groups is mandatory')

        if search_string is not None:
            pids = self.find_pids(name,
                                  search_string,
                                  exact_match,
                                  ignore_ad=ignore_ad)
        elif pid is not None:
            # we use Process(pid) as a means to search, if pid not found
            # psutil.NoSuchProcess is raised.
            pids = self._get_pid_set(pid)
        elif pid_file is not None:
            try:
                with open(pid_file, 'r') as file_pid:
                    pid_line = file_pid.readline().strip()
                    pids = self._get_pid_set(int(pid_line))
            except IOError as e:
                # pid file doesn't exist, assuming the process is not running
                self.log.debug('Unable to find pid file: %s', e)
                pids = set()
        else:
            raise ValueError(
                'The "search_string" or "pid" options are required for process identification'
            )

        if collect_children:
            pids.update(self._get_child_processes(pids))

        if user:
            pids = self._filter_by_user(user, pids)

        proc_state = self.get_process_state(name, pids, try_sudo)

        # FIXME 8.x remove the `name` tag
        tags.extend(['process_name:{}'.format(name), name])

        self.log.debug('ProcessCheck: process %s analysed', name)
        self.gauge('system.processes.number', len(pids), tags=tags)

        if len(pids) == 0:
            self.warning("No matching process '%s' was found", name)
            # reset the process caches now, something changed
            self.last_pid_cache_ts[name] = 0
            self.process_list_cache.reset()

        for attr, mname in iteritems(ATTR_TO_METRIC):
            vals = [x for x in proc_state[attr] if x is not None]
            # skip []
            if vals:
                sum_vals = sum(vals)
                if attr == 'run_time':
                    self.gauge('system.processes.{}.avg'.format(mname),
                               sum_vals / len(vals),
                               tags=tags)
                    self.gauge('system.processes.{}.max'.format(mname),
                               max(vals),
                               tags=tags)
                    self.gauge('system.processes.{}.min'.format(mname),
                               min(vals),
                               tags=tags)

                # FIXME 8.x: change this prefix?
                else:
                    self.gauge('system.processes.{}'.format(mname),
                               sum_vals,
                               tags=tags)
                    if mname in ['ioread_bytes', 'iowrite_bytes']:
                        self.monotonic_count(
                            'system.processes.{}_count'.format(mname),
                            sum_vals,
                            tags=tags)

        for attr, mname in iteritems(ATTR_TO_METRIC_RATE):
            vals = [x for x in proc_state[attr] if x is not None]
            if vals:
                self.rate('system.processes.{}'.format(mname),
                          sum(vals),
                          tags=tags)

        self._process_service_check(name, len(pids),
                                    instance.get('thresholds', None), tags)
コード例 #8
0
    def check(self, _):
        aci_url = self.instance.get('aci_url')
        aci_urls = self.instance.get('aci_urls', [])
        if aci_url:
            aci_urls.append(aci_url)

        if not aci_urls:
            raise ConfigurationError(
                "The Cisco ACI check requires at least one url")

        username = self.instance['username']
        pwd = self.instance.get('pwd')
        instance_hash = hash_mutable(self.instance)

        appcenter = _is_affirmative(self.instance.get('appcenter'))

        cert_key = self.instance.get('cert_key')
        if not cert_key and self.instance.get('cert_key_path'):
            with open(self.instance.get('cert_key_path'), 'rb') as f:
                cert_key = f.read()

        cert_name = self.instance.get('cert_name')
        if not cert_name:
            cert_name = username

        cert_key_password = self.instance.get('cert_key_password')

        if instance_hash in self._api_cache:
            api = self._api_cache.get(instance_hash)
        else:
            api = Api(
                aci_urls,
                self.http,
                username,
                password=pwd,
                cert_name=cert_name,
                cert_key=cert_key,
                log=self.log,
                appcenter=appcenter,
                cert_key_password=cert_key_password,
            )
            self._api_cache[instance_hash] = api

        service_check_tags = []
        for url in aci_urls:
            service_check_tags.append("url:{}".format(url))
        service_check_tags.extend(self.check_tags)
        service_check_tags.extend(self.instance.get('tags', []))

        try:
            api.login()
        except Exception as e:
            self.log.error("Cannot login to the Cisco ACI: %s", e)
            self.service_check(
                SERVICE_CHECK_NAME,
                AgentCheck.CRITICAL,
                message="aci login returned a status of {}".format(e),
                tags=service_check_tags,
            )
            raise

        self.tagger.api = api

        try:
            tenant = Tenant(self, api, self.instance, instance_hash)
            tenant.collect()
        except Exception as e:
            self.log.error('tenant collection failed: %s', e)
            self.service_check(
                SERVICE_CHECK_NAME,
                AgentCheck.CRITICAL,
                message="aci tenant operations failed, returning a status of {}"
                .format(e),
                tags=service_check_tags,
            )
            api.close()
            raise

        try:
            fabric = Fabric(self, api, self.instance)
            fabric.collect()
        except Exception as e:
            self.log.error('fabric collection failed: %s', e)
            self.service_check(
                SERVICE_CHECK_NAME,
                AgentCheck.CRITICAL,
                message="aci fabric operations failed, returning a status of {}"
                .format(e),
                tags=service_check_tags,
            )
            api.close()
            raise

        try:
            capacity = Capacity(api,
                                self.instance,
                                check_tags=self.check_tags,
                                gauge=self.gauge,
                                log=self.log)
            capacity.collect()
        except Exception as e:
            self.log.error('capacity collection failed: %s', e)
            self.service_check(
                SERVICE_CHECK_NAME,
                AgentCheck.CRITICAL,
                message=
                "aci capacity operations failed, returning a status of {}".
                format(e),
                tags=service_check_tags,
            )
            api.close()
            raise

        self.service_check(SERVICE_CHECK_NAME,
                           AgentCheck.OK,
                           tags=service_check_tags)

        self.set_external_tags(self.get_external_host_tags())

        api.close()