def get_connection(self, key, host, port, user, password, dbname, ssl, connect_fct, use_cached=True): "Get and memoize connections to instances" if key in self.dbs and use_cached: return self.dbs[key] elif host != "" and user != "": try: if host == 'localhost' and password == '': # Use ident method connection = connect_fct("user=%s dbname=%s" % (user, dbname)) elif port != '': connection = connect_fct(host=host, port=port, user=user, password=password, database=dbname, ssl=ssl) elif host.startswith('/'): # If the hostname starts with /, it's probably a path # to a UNIX socket. This is similar behaviour to psql connection = connect_fct(unix_sock=host, user=user, password=password, database=dbname) else: connection = connect_fct(host=host, user=user, password=password, database=dbname, ssl=ssl) except Exception as e: message = u'Error establishing postgres connection: %s' % ( str(e)) service_check_tags = self._get_service_check_tags( host, port, dbname) self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, tags=service_check_tags, message=message) raise else: if not host: raise CheckException( "Please specify a Postgres host to connect to.") elif not user: raise CheckException( "Please specify a user to connect to Postgres as.") self.dbs[key] = connection return connection
def check(self, instance): if 'components_file' not in instance: raise CheckException( 'Static topology instance missing "components_file" value.') if 'relations_file' not in instance: raise CheckException( 'Static topology instance missing "relations_file" value.') if 'type' not in instance: raise CheckException( 'Static topology instance missing "type" value.') instance_tags = instance['tags'] if 'tags' in instance else [] if instance['type'].lower() == "csv": component_file = instance['components_file'] relation_file = instance['relations_file'] delimiter = instance['delimiter'] instance_key = {"type": "StaticTopology", "url": component_file} instance_tags.extend([ "csv.component:%s" % component_file, "csv.relation:%s" % relation_file ]) self.start_snapshot(instance_key) self.handle_component_csv(instance_key, component_file, delimiter, instance_tags) if relation_file: self.handle_relation_csv(instance_key, relation_file, delimiter, instance_tags) self.stop_snapshot(instance_key) else: raise CheckException( 'Static topology instance only supports type CSV.')
def _get_connection(self, host, port, user, password, dbname): if host != "" and user != "": if host == 'localhost' and password == '': # Use ident method connection = pg.connect("user=%s dbname=%s" % (user, dbname)) elif port != '': connection = pg.connect(host=host, port=port, user=user, password=password, database=dbname, ssl=False) else: connection = pg.connect(host=host, user=user, password=password, database=dbname, ssl=ssl) else: if not host: raise CheckException( "Please specify a Postgres host to connect to.") elif not user: raise CheckException( "Please specify a user to connect to Postgres as.") return connection
def handle_component_csv(self, instance_key, filelocation, delimiter, instance_tags): self.log.debug("Processing component CSV file %s." % filelocation) COMPONENT_ID_FIELD = 'id' COMPONENT_TYPE_FIELD = 'type' COMPONENT_NAME_FIELD = 'name' with codecs.open(filelocation, mode='r', encoding="utf-8-sig") as csvfile: reader = csv.reader(csvfile, delimiter=delimiter, quotechar='"') header_row = next(reader, None) if header_row is None: raise CheckException("Component CSV file is empty.") self.log.debug("Detected component header: %s" % str(header_row)) if len(header_row) == 1: self.log.warn( "Detected one field in header, is the delimiter set properly?" ) self.log.warn("Detected component header: %s" % str(header_row)) # mandatory fields for field in (COMPONENT_ID_FIELD, COMPONENT_NAME_FIELD, COMPONENT_TYPE_FIELD): if field not in header_row: raise CheckException( 'CSV header %s not found in component csv.' % field) id_idx = header_row.index(COMPONENT_ID_FIELD) type_idx = header_row.index(COMPONENT_TYPE_FIELD) for row in reader: data = dict(zip(header_row, row)) # label processing labels = data.get('labels', "") labels = labels.split(',') if labels else [] labels.extend(instance_tags) data['labels'] = labels # environment processing environments = data.get('environments', "Production") # environments column may be in the row but may be empty/unspecified for that row, defaulting to Production environments = environments.split(',') if environments else [ "Production" ] data['environments'] = environments # identifiers processing identifiers = data.get('identifiers', "") # identifiers column may be in the row but may be empty/unspecified for that row, defaulting identifiers = identifiers.split(',') if identifiers else [] data['identifiers'] = identifiers self.component(instance_key=instance_key, id=row[id_idx], type={"name": row[type_idx]}, data=data)
def get_connection(self, key, host, port, user, password, dbname, ssl, use_cached=True): "Get and memoize connections to instances" if key in self.dbs and use_cached: return self.dbs[key] elif host != "" and user != "": try: if host == 'localhost' and password == '': # Use ident method connection = pg.connect("user=%s dbname=%s" % (user, dbname)) elif port != '': connection = pg.connect(host=host, port=port, user=user, password=password, database=dbname, ssl=ssl) else: connection = pg.connect(host=host, user=user, password=password, database=dbname, ssl=ssl) except Exception as e: message = u'Error establishing postgres connection: %s' % (str(e)) service_check_tags = self._get_service_check_tags(host, port, dbname) self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, tags=service_check_tags, message=message) raise else: if not host: raise CheckException("Please specify a Postgres host to connect to.") elif not user: raise CheckException("Please specify a user to connect to Postgres as.") self.dbs[key] = connection return connection
def _get_custom_metrics(self, custom_metrics, key): # Pre-processed cached custom_metrics if key in self.custom_metrics: return self.custom_metrics[key] # Otherwise pre-process custom metrics and verify definition required_parameters = ("descriptors", "metrics", "query", "relation") for m in custom_metrics: for param in required_parameters: if param not in m: raise CheckException( "Missing {0} parameter in custom metric".format(param)) self.log.debug("Metric: {0}".format(m)) try: for ref, (_, mtype) in m['metrics'].iteritems(): cap_mtype = mtype.upper() if cap_mtype not in ('RATE', 'GAUGE', 'MONOTONIC'): raise CheckException( "Collector method {0} is not known." " Known methods are RATE, GAUGE, MONOTONIC".format( cap_mtype)) m['metrics'][ref][1] = getattr(PostgreSql, cap_mtype) self.log.debug("Method: %s" % (str(mtype))) except Exception as e: raise CheckException( "Error processing custom metric '{}': {}".format(m, e)) self.custom_metrics[key] = custom_metrics return custom_metrics
def _get_connect_kwargs(self, host, port, user, password, database_url): """ Get the params to pass to psycopg2.connect() based on passed-in vals from yaml settings file """ if database_url: return {'dsn': database_url} if not host: raise CheckException( "Please specify a PgBouncer host to connect to.") if not user: raise CheckException( "Please specify a user to connect to PgBouncer as.") if host in ('localhost', '127.0.0.1') and password == '': return { # Use ident method 'dsn': "user={} dbname={}".format(user, self.DB_NAME) } if port: return {'host': host, 'user': user, 'password': password, 'database': self.DB_NAME, 'port': port} return {'host': host, 'user': user, 'password': password, 'database': self.DB_NAME}
def _get_connection(self, key, host, port, user, password, use_cached=True): if key in self.dbs and use_cached: return self.dbs[key] elif host != "" and user != "": try: if host == 'localhost' and password == '': connection = pg.connect("user=%s dbname=%s" % (user, self.DB_NAME)) elif port != '': connection = pg.connect(host=host, port=port, user=user, password=password, database=self.DB_NAME) else: connection = pg.connect(host=host, user=user, password=password, database=self.DB_NAME) connection.set_isolation_level(pg.extensions.ISOLATION_LEVEL_AUTOCOMMIT) self.log.debug('pgbouncer status: %s' % AgentCheck.OK) except Exception: message = u'Cannot establish connection to pgbouncer://%s:%s/%s' % (host, port, self.DB_NAME) self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, tags=self._get_service_checks_tags(host, port), message=message) self.log.debug('pgbouncer status: %s' % AgentCheck.CRITICAL) raise else: if not host: raise CheckException("Please specify a PgBouncer host to connect to.") elif not user: raise CheckException("Please specify a user to connect to PgBouncer as.") self.dbs[key] = connection return connection
def handle_relation_csv(self, instance_key, filelocation, delimiter, instance_tags): self.log.debug("Processing relation CSV file %s." % filelocation) RELATION_SOURCE_ID_FIELD = 'sourceid' RELATION_TARGET_ID_FIELD = 'targetid' RELATION_TYPE_FIELD = 'type' with codecs.open(filelocation, mode='r', encoding="utf-8-sig") as csvfile: reader = csv.reader(csvfile, delimiter=delimiter, quotechar='|') header_row = next(reader, None) if header_row is None: raise CheckException("Relation CSV file is empty.") self.log.debug("Detected relation header: %s" % str(header_row)) # mandatory fields for field in (RELATION_SOURCE_ID_FIELD, RELATION_TARGET_ID_FIELD, RELATION_TYPE_FIELD): if field not in header_row: raise CheckException( 'CSV header %s not found in relation csv.' % field) source_id_idx = header_row.index(RELATION_SOURCE_ID_FIELD) target_id_idx = header_row.index(RELATION_TARGET_ID_FIELD) type_idx = header_row.index(RELATION_TYPE_FIELD) for row in reader: data = dict(zip(header_row, row)) self.relation(instance_key=instance_key, source_id=row[source_id_idx], target_id=row[target_id_idx], type={"name": row[type_idx]}, data=data)
def get_connection(self, key, host, port, user, password, dbname, use_cached=True): "Get and memoize connections to instances" if key in self.dbs and use_cached: return self.dbs[key] elif host != "" and user != "": try: import psycopg2 as pg except ImportError: raise ImportError("psycopg2 library cannot be imported. Please check the installation instruction on the Datadog Website.") if host == 'localhost' and password == '': # Use ident method connection = pg.connect("user=%s dbname=%s" % (user, dbname)) elif port != '': connection = pg.connect(host=host, port=port, user=user, password=password, database=dbname) else: connection = pg.connect(host=host, user=user, password=password, database=dbname) else: if not host: raise CheckException("Please specify a Postgres host to connect to.") elif not user: raise CheckException("Please specify a user to connect to Postgres as.") try: connection.autocommit = True except AttributeError: # connection.autocommit was added in version 2.4.2 from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) self.dbs[key] = connection return connection
def get_connection(self, key, host, port, user, password, dbname, use_cached=True): "Get and memoize connections to instances" if key in self.dbs and use_cached: return self.dbs[key] elif host != "" and user != "": try: service_check_tags = ["host:%s" % host, "port:%s" % port] if dbname: service_check_tags.append("db:%s" % dbname) if host == 'localhost' and password == '': # Use ident method connection = pg.connect("user=%s dbname=%s" % (user, dbname)) elif port != '': connection = pg.connect(host=host, port=port, user=user, password=password, database=dbname) else: connection = pg.connect(host=host, user=user, password=password, database=dbname) status = AgentCheck.OK self.service_check('postgres.can_connect', status, tags=service_check_tags) self.log.info('pg status: %s' % status) except Exception: status = AgentCheck.CRITICAL self.service_check('postgres.can_connect', status, tags=service_check_tags) self.log.info('pg status: %s' % status) raise else: if not host: raise CheckException( "Please specify a Postgres host to connect to.") elif not user: raise CheckException( "Please specify a user to connect to Postgres as.") self.dbs[key] = connection return connection
def check(self, instance): if 'url' not in instance: raise CheckException( 'Splunk topology instance missing "url" value.') if instance["url"] not in self.instance_data: self.instance_data[instance["url"]] = Instance( instance, self.init_config) instance = self.instance_data[instance["url"]] current_time_epoch_seconds = self._current_time_seconds() instance_key = instance.instance_key if not instance.should_poll(current_time_epoch_seconds): return if instance.snapshot: self.start_snapshot(instance_key) try: self._auth_session(instance) saved_searches = self._saved_searches(instance) instance.saved_searches.update_searches(self.log, saved_searches) all_success = True for saved_searches in chunks(instance.saved_searches.searches, instance.saved_searches_parallel): all_success &= self._dispatch_and_await_search( instance, saved_searches) # If everything was successful, update the timestamp if all_success: self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK) instance.last_successful_poll_epoch_seconds = current_time_epoch_seconds if instance.snapshot: self.stop_snapshot(instance_key) except Exception as e: self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, tags=instance.tags, message=str(e)) self.log.exception("Splunk topology exception: %s" % str(e)) if not instance.splunk_ignore_saved_search_errors: self._clear_topology(instance_key, clear_in_snapshot=True) raise CheckException( "Splunk topology failed with message: %s" % e), None, sys.exc_info()[2]
def check(self, instance): if 'url' not in instance: raise CheckException( 'Splunk metric/event instance missing "url" value.') current_time = self._current_time_seconds() url = instance["url"] if url not in self.instance_data: self.instance_data[url] = self.get_instance(instance, current_time) instance = self.instance_data[url] if not instance.initial_time_done(current_time): self.log.debug( "Skipping splunk metric/event instance %s, waiting for initial time to expire" % url) return self.load_status() instance.update_status(current_time, self.status) try: self._auth_session(instance) saved_searches = self._saved_searches(instance) instance.saved_searches.update_searches(self.log, saved_searches) executed_searches = False for saved_searches in chunks(instance.saved_searches.searches, instance.saved_searches_parallel): executed_searches |= self._dispatch_and_await_search( instance, saved_searches) if len(instance.saved_searches.searches ) != 0 and not executed_searches: raise CheckException( "No saved search was successfully executed.") # If no service checks were produced, everything is ok if len(self.service_checks) is 0: self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK) except Exception as e: self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, tags=instance.tags, message=str(e)) self.log.exception("Splunk event exception: %s" % str(e)) raise CheckException( "Error getting Splunk data, please check your configuration. Message: " + str(e))
def _get_json(self, url, timeout, verify): tags = ["url:%s" % url] msg = None status = None try: r = requests.get(url, timeout=timeout, verify=verify) if r.status_code != 200: status = AgentCheck.CRITICAL msg = "Got %s when hitting %s" % (r.status_code, url) else: status = AgentCheck.OK msg = "Mesos master instance detected at %s " % url except requests.exceptions.Timeout as e: # If there's a timeout msg = "%s seconds timeout when hitting %s" % (timeout, url) status = AgentCheck.CRITICAL except Exception as e: msg = str(e) status = AgentCheck.CRITICAL finally: if self.service_check_needed: self.service_check(self.SERVICE_CHECK_NAME, status, tags=tags, message=msg) self.service_check_needed = False if status is AgentCheck.CRITICAL: raise CheckException( "Cannot connect to mesos, please check your configuration." ) if r.encoding is None: r.encoding = 'UTF8' return r.json()
def _mocked_interval_search(*args, **kwargs): if test_data["throw"]: raise CheckException("Is broke it") sid = args[0] self.assertTrue(sid in test_data["expected_searches"]) return [json.loads(Fixtures.read_file("empty.json", sdk_dir=FIXTURE_DIR))]
def check(self, instance): #### Metrics collection endpoint = instance.get('prometheus_endpoint') if endpoint is None: raise CheckException( "Unable to find prometheus_endpoint in config file.") # By default we send the buckets send_buckets = _is_affirmative( instance.get('send_histograms_buckets', True)) try: self.process(endpoint, send_histograms_buckets=send_buckets, instance=instance) self.service_check(self.PROMETHEUS_SERVICE_CHECK_NAME, PrometheusCheck.OK) except requests.exceptions.ConnectionError as e: # Unable to connect to the metrics endpoint self.service_check( self.PROMETHEUS_SERVICE_CHECK_NAME, PrometheusCheck.CRITICAL, message= "Unable to retrieve Prometheus metrics from endpoint %s: %s" % (endpoint, e.message)) #### Service check to check Gitlab's health endpoints for check_type in self.ALLOWED_SERVICE_CHECKS: self._check_health_endpoint(instance, check_type)
def _get_json(self, url, timeout, auth=None, verify=True): tags = ["url:%s" % url] msg = None status = None resp = None try: resp = requests.get(url, timeout=timeout, auth=auth, verify=verify) if resp.status_code != 200: status = AgentCheck.CRITICAL msg = "Got %s when hitting %s" % (resp.status_code, url) except requests.exceptions.Timeout as e: # If there's a timeout msg = "%s seconds timeout when hitting %s" % (timeout, url) status = AgentCheck.CRITICAL except Exception as e: msg = str(e) status = AgentCheck.CRITICAL finally: if status is AgentCheck.CRITICAL: self.service_check(self.SERVICE_CHECK_NAME, status, tags=tags, message=msg) raise CheckException( "Cannot connect to ServiceNow CMDB, please check your configuration." ) if resp.encoding is None: resp.encoding = 'UTF8' return resp.json()
def _search_chunk(self, saved_search, search_id, offset, count): """ Retrieves the results of an already running splunk search, identified by the given search id. :param saved_search: current SavedSearch being processed :param search_id: perform a search operation on the search id :param offset: starting offset, begin is 0, to start retrieving from :param count: the maximum number of elements expecting to be returned by the API call :return: raw json response from splunk """ search_path = '/servicesNS/-/-/search/jobs/%s/results?output_mode=json&offset=%s&count=%s' % ( search_id, offset, count) response = self._do_get(search_path, saved_search.request_timeout_seconds, self.instance_config.verify_ssl_certificate) retry_count = 0 # retry until information is available. while response.status_code == 204: # HTTP No Content response if retry_count == saved_search.search_max_retry_count: raise CheckException("maximum retries reached for " + self.instance_config.base_url + " with search id " + search_id) retry_count += 1 time.sleep(saved_search.search_seconds_between_retries) response = self._do_get( search_path, saved_search.request_timeout_seconds, self.instance_config.verify_ssl_certificate) return response.json()
def _get_json(self, url, timeout): # Use a hash of the URL as an aggregation key aggregation_key = md5(url).hexdigest() tags = ["url:%s" % url] msg = None status = None try: r = requests.get(url, timeout=timeout) if r.status_code != 200: status = AgentCheck.CRITICAL msg = "Got %s when hitting %s" % (r.status_code, url) else: status = AgentCheck.OK msg = "Mesos master instance detected at %s " % url except requests.exceptions.Timeout as e: # If there's a timeout msg = "%s seconds timeout when hitting %s" % (timeout, url) status = AgentCheck.CRITICAL except Exception as e: msg = str(e) status = AgentCheck.CRITICAL finally: if self.service_check_needed: self.service_check(self.SERVICE_CHECK_NAME, status, tags=tags, message=msg) self.service_check_needed = False if status is AgentCheck.CRITICAL: self.service_check(self.SERVICE_CHECK_NAME, status, tags=tags, message=msg) raise CheckException("Cannot connect to mesos, please check your configuration.") return r.json()
def check(self, instance): endpoint = instance.get('kube_state_url') if endpoint is None: raise CheckException("Unable to find kube_state_url in config file.") if 'labels_mapper' in instance: if isinstance(instance['labels_mapper'], dict): self.labels_mapper = instance['labels_mapper'] else: self.log.warning("labels_mapper should be a dictionnary") send_buckets = instance.get('send_histograms_buckets', True) # By default we send the buckets. if send_buckets is not None and str(send_buckets).lower() == 'false': send_buckets = False else: send_buckets = True # Job counters are monotonic: they increase at every run of the job # We want to send the delta via the `monotonic_count` method self.job_succeeded_count = defaultdict(int) self.job_failed_count = defaultdict(int) self.process(endpoint, send_histograms_buckets=send_buckets, instance=instance) for job_tags, job_count in self.job_succeeded_count.iteritems(): self.monotonic_count(self.NAMESPACE + '.job.succeeded', job_count, list(job_tags)) for job_tags, job_count in self.job_failed_count.iteritems(): self.monotonic_count(self.NAMESPACE + '.job.failed', job_count, list(job_tags))
def _get_json(self, url, timeout, verify, tags=None): tags = tags + ["url:%s" % url] if tags else ["url:%s" % url] msg = None status = None try: r = requests.get(url, timeout=timeout, verify=verify) if r.status_code != 200: status = AgentCheck.CRITICAL msg = "Got %s when hitting %s" % (r.status_code, url) else: status = AgentCheck.OK msg = "Mesos master instance detected at %s " % url except requests.exceptions.Timeout as e: # If there's a timeout msg = "%s seconds timeout when hitting %s" % (timeout, url) status = AgentCheck.CRITICAL except Exception as e: msg = str(e) status = AgentCheck.CRITICAL finally: self.log.debug('Request to url : {0}, timeout: {1}, message: {2}'.format(url, timeout, msg)) if self.service_check_needed: self.service_check(self.SERVICE_CHECK_NAME, status, tags=tags, message=msg) self.service_check_needed = False if status is AgentCheck.CRITICAL: raise CheckException('Cannot connect to mesos. Error: {0}'.format(msg)) if r.encoding is None: r.encoding = 'UTF8' return r.json()
def _mocked_dispatch_saved_search_dispatch(*args, **kwargs): if test_data["throw"]: raise CheckException("Is broke it") earliest_time = args[2]['dispatch.earliest_time'] if test_data["earliest_time"] != "": self.assertEquals(earliest_time, test_data["earliest_time"]) return test_data["sid"]
def line_to_interval(line): entry = line.rstrip('\n').split(' ') interval = UptimeInterval(int(entry[0]), int(entry[1])) if interval.start > interval.end: raise CheckException( "Invalid interval: first value ({0}) was greater than second ({1})" "".format(interval.start, interval.end)) return interval
def take_required_field(field, obj): """ Get a field form an object, remove its value and remove the field form the object """ if field not in obj: raise CheckException("Missing '%s' field in result data" % field) value = obj[field] del obj[field] return value
def get_type(self, type_field, ucmdb_element): if type_field in ucmdb_element: return ucmdb_element[type_field] elif type_field in ucmdb_element['data']: return ucmdb_element['data'][type_field] else: raise CheckException( "Unable to resolve element type from ucmdb data %s" % (str(ucmdb_element)))
def _process_customer_metrics(self, custom_metrics): required_parameters = ("descriptors", "metrics", "query", "relation") for m in custom_metrics: for param in required_parameters: if param not in m: raise CheckException("Missing {0} parameter in custom metric"\ .format(param)) self.log.debug("Metric: {0}".format(m)) for k, v in m['metrics'].items(): if v[1].upper() not in ['RATE', 'GAUGE', 'MONOTONIC']: raise CheckException("Collector method {0} is not known."\ "Known methods are RATE,GAUGE,MONOTONIC".format( v[1].upper())) m['metrics'][k][1] = getattr(PostgreSql, v[1].upper()) self.log.debug("Method: %s" % (str(v[1])))
def _check_health_endpoint(self, instance, check_type, tags): if check_type not in self.ALLOWED_SERVICE_CHECKS: raise CheckException("Health endpoint %s is not a valid endpoint" % check_type) url = instance.get('gitlab_url') if url is None: # Simply ignore this service check if not configured self.log.debug("gitlab_url not configured, service check %s skipped" % check_type) return service_check_tags = self._service_check_tags(url) service_check_tags.extend(tags) verify_ssl = self._verify_ssl(instance) ## Timeout settings timeouts = (int(instance.get('connect_timeout', GitlabCheck.DEFAULT_CONNECT_TIMEOUT)), int(instance.get('receive_timeout', GitlabCheck.DEFAULT_RECEIVE_TIMEOUT))) ## Auth settings auth = None if 'gitlab_user' in instance and 'gitlab_password' in instance: auth = (instance['gitlab_user'], instance['gitlab_password']) # These define which endpoint is hit and which type of check is actually performed # TODO: parse errors and report for single sub-service failure? service_check_name = "gitlab.%s" % check_type check_url = "%s/-/%s" % (url, check_type) try: self.log.debug('checking %s against %s' % (check_type, check_url)) r = requests.get(check_url, auth=auth, verify=verify_ssl, timeout=timeouts, headers=headers(self.agentConfig)) if r.status_code != 200: self.service_check(service_check_name, PrometheusCheck.CRITICAL, message="Got %s when hitting %s" % (r.status_code, check_url), tags=service_check_tags) raise Exception("Http status code {0} on check_url {1}".format(r.status_code, check_url)) else: r.raise_for_status() except requests.exceptions.Timeout: # If there's a timeout self.service_check(service_check_name, PrometheusCheck.CRITICAL, message="Timeout when hitting %s" % check_url, tags=service_check_tags) raise except Exception as e: self.service_check(service_check_name, PrometheusCheck.CRITICAL, message="Error hitting %s. Error: %s" % (check_url, e.message), tags=service_check_tags) raise else: self.service_check(service_check_name, PrometheusCheck.OK, tags=service_check_tags) self.log.debug("gitlab check %s succeeded" % check_type)
def __init__(self, name, init_config, agentConfig, instances=None): super(GitlabRunnerCheck, self).__init__(name, init_config, agentConfig, instances) # Mapping from Prometheus metrics names to Datadog ones # For now it's a 1:1 mapping # TODO: mark some metrics as rate allowed_metrics = init_config.get('allowed_metrics') if not allowed_metrics: raise CheckException("At least one metric must be whitelisted in `allowed_metrics`.") self.metrics_mapper = dict(zip(allowed_metrics, allowed_metrics))
def _mocked_dispatch_saved_search_do_post(*args, **kwargs): if test_data["throw"]: raise CheckException("Is broke it") class MockedResponse(): def json(self): return {"sid": test_data["sid"]} earliest_time = args[2]['dispatch.earliest_time'] if test_data["earliest_time"] != "": self.assertEquals(earliest_time, test_data["earliest_time"]) return MockedResponse()
def check(self, instance): if 'url' not in instance: raise CheckException( 'Splunk topology instance missing "url" value.') if instance["url"] not in self.instance_data: self.instance_data[instance["url"]] = Instance( instance, self.init_config) instance = self.instance_data[instance["url"]] current_time_epoch_seconds = self._current_time_seconds() instance_key = instance.instance_key if not instance.should_poll(current_time_epoch_seconds): return self.start_snapshot(instance_key) try: saved_searches = self._saved_searches(instance.instance_config) instance.saved_searches.update_searches(self.log, saved_searches) for saved_searches in chunks(instance.saved_searches.searches, instance.saved_searches_parallel): self._dispatch_and_await_search(instance, saved_searches) # If everything was successful, update the timestamp self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK) instance.last_successful_poll_epoch_seconds = current_time_epoch_seconds self.stop_snapshot(instance_key) except Exception as e: self._clear_topology(instance_key, clear_in_snapshot=True) self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, tags=instance.tags, message=str(e)) self.log.exception("Splunk topology exception: %s" % str(e)) raise CheckException( "Cannot connect to Splunk, please check your configuration. Message: " + str(e))