def check(self, instance):
        """
        Integration logic
        """
        if 'url' not in instance:
            raise ConfigurationError('Missing URL in configuration.')
        if 'token' not in instance:
            raise ConfigurationError('Missing API Token in configuration.')

        self.url = instance.get('url')
        self.token = instance.get('token')
        self.tags = instance.get('tags', [])
        self.domain = instance.get('domain', 'dynatrace')
        self.environment = instance.get('environment', 'production')
        self.verify = instance.get('verify', True)
        self.cert = instance.get('cert', '')
        self.keyfile = instance.get('keyfile', '')

        try:
            self.start_snapshot()
            self.process_topology()
            self.stop_snapshot()
            msg = "Dynatrace topology processed successfully"
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.OK,
                               tags=self.tags,
                               message=msg)
        except Exception as e:
            self.log.exception(str(e))
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               tags=self.tags,
                               message=str(e))
    def get_connection(self,
                       key,
                       host,
                       port,
                       user,
                       password,
                       dbname,
                       ssl,
                       connect_fct,
                       tags,
                       use_cached=True):
        """Get and memoize connections to instances"""
        if key in self.dbs and use_cached:
            return self.dbs[key]

        elif host != "" and user != "":
            try:
                if host == 'localhost' and password == '':
                    # Use ident method
                    connection = connect_fct("user=%s dbname=%s" %
                                             (user, dbname))
                elif port != '':
                    connection = connect_fct(host=host,
                                             port=port,
                                             user=user,
                                             password=password,
                                             database=dbname,
                                             ssl=ssl)
                elif host.startswith('/'):
                    # If the hostname starts with /, it's probably a path
                    # to a UNIX socket. This is similar behaviour to psql
                    connection = connect_fct(unix_sock=host,
                                             user=user,
                                             password=password,
                                             database=dbname)
                else:
                    connection = connect_fct(host=host,
                                             user=user,
                                             password=password,
                                             database=dbname,
                                             ssl=ssl)
                self.dbs[key] = connection
                return connection
            except Exception as e:
                message = u'Error establishing postgres connection: %s' % (
                    str(e))
                service_check_tags = self._get_service_check_tags(
                    host, port, tags)
                self.service_check(self.SERVICE_CHECK_NAME,
                                   AgentCheck.CRITICAL,
                                   tags=service_check_tags,
                                   message=message)
                raise
        else:
            if not host:
                raise ConfigurationError(
                    'Please specify a Postgres host to connect to.')
            elif not user:
                raise ConfigurationError(
                    'Please specify a user to connect to Postgres as.')
    def _get_custom_metrics(self, custom_metrics, key):
        # Pre-processed cached custom_metrics
        if key in self.custom_metrics:
            return self.custom_metrics[key]

        # Otherwise pre-process custom metrics and verify definition
        required_parameters = ("descriptors", "metrics", "query", "relation")

        for m in custom_metrics:
            for param in required_parameters:
                if param not in m:
                    raise ConfigurationError(
                        'Missing {} parameter in custom metric'.format(param))

            self.log.debug("Metric: {0}".format(m))

            try:
                for ref, (_, mtype) in iteritems(m['metrics']):
                    cap_mtype = mtype.upper()
                    if cap_mtype not in ('RATE', 'GAUGE', 'MONOTONIC'):
                        raise ConfigurationError(
                            'Collector method {} is not known. '
                            'Known methods are RATE, GAUGE, MONOTONIC'.format(
                                cap_mtype))

                    m['metrics'][ref][1] = getattr(PostgreSql, cap_mtype)
                    self.log.debug("Method: %s" % (str(mtype)))
            except Exception as e:
                raise Exception(
                    'Error processing custom metric `{}`: {}'.format(m, e))

        self.custom_metrics[key] = custom_metrics
        return custom_metrics
    def get_instance_key(self, instance):
        if "name" not in instance:
            raise ConfigurationError("Missing name in topology instance configuration.")
        if "location" not in instance:
            raise ConfigurationError("Missing location in topology instance configuration.")

        name = instance["name"]
        return TopologyInstance(self.INSTANCE_TYPE, name)
    def check(self, instance):
        host, url, user, password, tags = self._get_config(instance)

        if not (url and user and password):
            raise ConfigurationError(
                "Missing 'url', 'user' or 'password' in instance configuration."
            )

        try:
            self.start_snapshot()

            self._collect_topology()

            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.OK,
                               message="OK",
                               tags=self.tags)
        except Exception as e:
            self.log.exception(str(e))
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               message=str(e),
                               tags=self.tags)
        finally:
            self.stop_snapshot()
    def get_instance_key(self, instance):
        if 'url' not in instance:
            raise ConfigurationError('Missing url in configuration.')

        return TopologyInstance(self.INSTANCE_TYPE,
                                instance["url"],
                                with_snapshots=False)
    def get_instance_key(self, instance):
        if 'hostip' not in instance:
            raise ConfigurationError(
                'Missing url in topology instance configuration.')

        instance_url = instance['hostip']
        return TopologyInstance(self.INSTANCE_TYPE, instance_url)
    def check(self, instance):
        username = instance.get("username", "")
        password = instance.get("password", "")
        self.organization_id = instance.get("organization_id", None)
        tags = instance.get("tags", [])

        if not (username and password):
            raise ConfigurationError(
                "Missing 'username' or 'password' in instance configuration.")

        try:
            self.start_snapshot()

            #self._authenticate(username, password)

            self._collect_topology()

            self.service_check(
                self.SERVICE_CHECK_NAME,
                AgentCheck.OK,
                message="OK",
                tags=["organization_id:%s" % self.organization_id])
        except Exception as e:
            self.log.exception(str(e))
            self.service_check(
                self.SERVICE_CHECK_NAME,
                AgentCheck.CRITICAL,
                message=str(e),
                tags=["organization_id:%s" % self.organization_id])
        finally:
            self.stop_snapshot()
Example #9
0
    def get_instance_key(self, instance):
        if 'url' not in instance:
            raise ConfigurationError(
                'Missing url in topology instance configuration.')

        instance_url = urlparse(instance['url']).netloc
        return TopologyInstance(self.INSTANCE_TYPE, instance_url)
Example #10
0
    def handle_health_csv(self, filelocation, delimiter):
        self.log.debug("Processing health CSV file %s." % filelocation)

        CHECK_STATE_ID_FIELD = 'check_state_id'
        NAME_FIELD = 'name'
        HEALTH_FIELD = 'health'
        TOPOLOGY_ELEMENT_IDENTIFIER_FIELD = 'topology_element_identifier'
        MESSAGE_FIELD = 'message'

        with codecs.open(filelocation, mode='r', encoding="utf-8-sig") as csvfile:
            reader = csv.reader(csvfile, delimiter=delimiter, quotechar='"')

            header_row = next(reader, None)
            if header_row is None:
                raise ConfigurationError("Health CSV file is empty.")
            self.log.debug("Detected health header: %s" % str(header_row))

            if len(header_row) == 1:
                self.log.warn("Detected one field in header, is the delimiter set properly?")
                self.log.warn("Detected health header: %s" % str(header_row))

            # mandatory fields
            for field in (CHECK_STATE_ID_FIELD, NAME_FIELD, HEALTH_FIELD, TOPOLOGY_ELEMENT_IDENTIFIER_FIELD):
                if field not in header_row:
                    raise ConfigurationError('CSV header %s not found in health csv.' % field)

            header_row_number_of_fields = len(header_row)

            for row in reader:
                data = dict(zip(header_row, row))
                if len(data) != header_row_number_of_fields:
                    self.log.warn("Skipping row because number of fields do not match header row, got: %s" % row)
                    continue

                check_state_id = data.get(CHECK_STATE_ID_FIELD)
                name = data.get(NAME_FIELD)
                health = HealthType().convert(data.get(HEALTH_FIELD), None)
                topology_element_identifier = data.get(TOPOLOGY_ELEMENT_IDENTIFIER_FIELD)
                message = data.get(MESSAGE_FIELD, None)

                self.health.check_state(check_state_id, name, health, topology_element_identifier,
                                        message if message != "" else None)
    def get_instance_key(self, instance):
        if "host" not in instance:
            raise ConfigurationError(
                "Missing 'host' in instance configuration.")

        return TopologyInstance(self.INSTANCE_TYPE, instance["host"])
Example #12
0
    def check(self, instance):
        """
        Integration logic
        """
        if 'user' not in instance:
            raise ConfigurationError('Missing API user in configuration.')
        if 'password' not in instance:
            raise ConfigurationError('Missing API password in configuration.')

        stackstate_environment = instance.get('stackstate_environment',
                                              'Production')
        self.ssl_verify = instance.get('ssl_verify', True)

        url = instance['url']

        topology_instance = {"type": self.SERVICE_CHECK_NAME, "url": url}
        try:
            self.start_snapshot()
            self.check_connection(url)
            auth = self.login(url, instance['user'], instance['password'])

            hosts = {}  # key: host_id, value: ZabbixHost

            # Topology, get all hosts
            for zabbix_host in self.retrieve_hosts(url, auth):
                self.process_host_topology(topology_instance, zabbix_host,
                                           stackstate_environment)

                hosts[zabbix_host.host_id] = zabbix_host

            # Telemetry, get all problems.
            zabbix_problems = self.retrieve_problems(url, auth)

            event_ids = list(problem.event_id for problem in zabbix_problems)
            zabbix_events = [] if len(
                event_ids) == 0 else self.retrieve_events(
                    url, auth, event_ids)

            rolled_up_events_per_host = {}  # host_id -> [ZabbixEvent]
            most_severe_severity_per_host = {}  # host_id -> severity int
            for zabbix_event in zabbix_events:
                for host_id in zabbix_event.host_ids:
                    if host_id in rolled_up_events_per_host:
                        rolled_up_events_per_host[host_id].append(zabbix_event)
                        if most_severe_severity_per_host[
                                host_id] < zabbix_event.trigger.priority:
                            most_severe_severity_per_host[
                                host_id] = zabbix_event.trigger.priority
                    else:
                        rolled_up_events_per_host[host_id] = [zabbix_event]
                        most_severe_severity_per_host[
                            host_id] = zabbix_event.trigger.priority

            self.log.debug('rolled_up_events_per_host:' +
                           str(rolled_up_events_per_host))
            self.log.debug('most_severe_severity_per_host:' +
                           str(most_severe_severity_per_host))

            # iterate all hosts to send an event per host, either in OK/PROBLEM state
            for host_id, zabbix_host in hosts.items():
                severity = 0
                triggers = []

                if host_id in rolled_up_events_per_host:
                    triggers = [
                        event.trigger.description
                        for event in rolled_up_events_per_host[host_id]
                    ]
                    severity = most_severe_severity_per_host[host_id]

                self.event({
                    'timestamp':
                    int(time.time()),
                    'msg_title':
                    "Zabbix event on host '{}': severity: {}".format(
                        zabbix_host.name, severity),
                    'msg_text':
                    "Zabbix event on host '{}': severity: {}".format(
                        zabbix_host.name, severity),
                    'source_type_name':
                    self.INSTANCE_TYPE,
                    'host':
                    self.hostname,
                    'tags': [
                        'host_id:%s' % host_id,
                        'host:%s' % zabbix_host.host,
                        'host_name:%s' % zabbix_host.name,
                        'severity:%s' % severity,
                        'triggers:%s' % triggers
                    ]
                })
            self.stop_snapshot()
            msg = "Zabbix instance detected at %s " % url
            tags = ["url:%s" % url]
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.OK,
                               tags=tags,
                               message=msg)
        except Exception as e:
            self.log.exception(str(e))
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               message=str(e))
Example #13
0
    def get_instance_key(self, instance):
        if 'url' not in instance:
            raise ConfigurationError('Missing API url in configuration.')

        return StackPackInstance(self.INSTANCE_TYPE, instance["url"])
Example #14
0
    def check(self, instance):
        if 'nginx_status_url' not in instance:
            raise ConfigurationError(
                'NginX instance missing "nginx_status_url" value.')

        tags = instance.get('tags', [])

        url, use_plus_api, plus_api_version = self._get_instance_params(
            instance)

        if not use_plus_api:
            response, content_type, version = self._get_data(instance, url)
            # for unpaid versions
            self._set_version_metadata(version)

            self.log.debug("Nginx status `response`: %s", response)
            self.log.debug("Nginx status `content_type`: %s", content_type)

            if content_type.startswith('application/json'):
                metrics = self.parse_json(response, tags)
            else:
                metrics = self.parse_text(response, tags)
        else:
            metrics = []
            self._perform_service_check(instance,
                                        '{}/{}'.format(url, plus_api_version))

            # These are all the endpoints we have to call to get the same data as we did with the old API
            # since we can't get everything in one place anymore.
            for endpoint, nest in chain(iteritems(PLUS_API_ENDPOINTS),
                                        iteritems(PLUS_API_STREAM_ENDPOINTS)):
                response = self._get_plus_api_data(url, plus_api_version,
                                                   endpoint, nest)
                self.log.debug("Nginx Plus API version %s `response`: %s",
                               plus_api_version, response)
                metrics.extend(self.parse_json(response, tags))

        funcs = {
            'gauge': self.gauge,
            'rate': self.rate,
            'count': self.monotonic_count
        }
        conn = None
        handled = None

        for row in metrics:
            try:
                name, value, tags, metric_type = row

                # Translate metrics received from VTS
                if instance.get('use_vts', False):
                    # Requests per second
                    if name == 'nginx.connections.handled':
                        handled = value
                    if name == 'nginx.connections.accepted':
                        conn = value
                        self.rate('nginx.net.conn_opened_per_s', conn, tags)
                    if handled is not None and conn is not None:
                        self.rate('nginx.net.conn_dropped_per_s',
                                  conn - handled, tags)
                        handled = None
                        conn = None
                    if name == 'nginx.connections.requests':
                        self.rate('nginx.net.request_per_s', value, tags)

                    name = VTS_METRIC_MAP.get(name)
                    if name is None:
                        continue

                if name in METRICS_SEND_AS_COUNT:
                    func_count = funcs['count']
                    func_count(name + "_count", value, tags)
                func = funcs[metric_type]
                func(name, value, tags)

                # for vts and plus versions
                if name == 'nginx.version':
                    self._set_version_metadata(value)

            except Exception as e:
                self.log.error('Could not submit metric: %s: %s', repr(row), e)
 def get_instance_key(self, instance):
     if "organization_id" not in instance:
         raise ConfigurationError(
             "Missing 'organization' in instance configuration.")
     return TopologyInstance(self.INSTANCE_TYPE, str(self.organization_id))
Example #16
0
    def check(self, instance):
        url, user, password = self._get_config(instance)

        if not (url and user and password):
            raise ConfigurationError(
                "Missing 'url', 'user' or 'password' in instance configuration."
            )

        start_time = time.time()
        try:
            self.start_threads(self.thread_count, self.thread_timeout)
            self.log.info("{0}: Checks started".format(self.host))
            self.start_snapshot()
            snapshot1_time = time.time()
            lap_time = snapshot1_time - start_time
            self.log.info("{0}: start_snapshot run time is {1}Seconds".format(
                self.host, lap_time))
            self._collect_topology()
            topology_time = time.time()
            lap_time = topology_time - snapshot1_time
            self.log.info(
                "{0}: _collect_topology run time is {1}Seconds".format(
                    self.host, lap_time))

            self._collect_metrics()
            metrics_time = time.time()
            lap_time = metrics_time - topology_time
            self.log.info(
                "{0}: _collect_metrics run time is {1}Seconds".format(
                    self.host, lap_time))

            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.OK,
                               message="OK",
                               tags=self.tags)
            service_time = time.time()
            lap_time = service_time - metrics_time
            self.log.info("{0}: service_check run time is {1}Seconds".format(
                self.host, lap_time))
        except Exception as e:
            # sys.stdout.write("check: Exception\n{}\n".format(e))
            self.log.exception(str(e))
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               message=str(e),
                               tags=self.tags)
        finally:
            if self.host in self.queue.keys():
                self.queue[self.host].join(
                )  # Waits/Blocks until self.queue is empty...
            self.stop_snapshot()
            snapshot2_time = time.time()
            try:
                lap_time = snapshot2_time - service_time
                self.log.info(
                    "{0}: Drain queue & stop_snapshot run time is {1}Seconds".
                    format(self.host, lap_time))
            except NameError:
                self.log.info(
                    "{0}: Exception occured, measuments unavailable".format(
                        self.host))
            stop_time = time.time()
            run_time = stop_time - start_time
            self.log.info("{0}: Check total run time is {1}Seconds".format(
                self.host, run_time))