class DatadogStatsLogger(LoggingMixin): def __init__(self, datadog_conn_id='datadog_default'): super().__init__() conn = BaseHook.get_connection(datadog_conn_id) self.api_key = conn.extra_dejson.get('api_key', None) self.app_key = conn.extra_dejson.get('app_key', None) self.source_type_name = conn.extra_dejson.get('source_type_name ', None) # If the host is populated, it will use that hostname instead # for all metric submissions self.host = conn.host if self.api_key is None: raise AirflowException('api_key must be specified in the ' 'Datadog connection details') self.log.info('Setting up api keys for Datadog') self.stats = None initialize(api_key=self.api_key, app_key=self.app_key) def incr(self, stat, count=1, rate=1, tags=None): self.log.info('datadog incr: {} {} {} {}'.format(stat, count, rate, tags)) self.stats.increment(stat, value=count, sample_rate=rate, tags=self._format_tags(tags)) def decr(self, stat, count=1, rate=1, tags=None): self.log.info('datadog decr: {} {} {} {}'.format(stat, count, rate, tags)) self.stats.decrement(stat, value=count, sample_rate=rate, tags=self._format_tags(tags)) def gauge(self, stat, value, rate=1, delta=False, tags=None): self.log.info('datadog gauge: {} {} {} {} {}'.format(stat, value, rate, delta, tags)) if delta: self.log.warning('Deltas are unsupported in Datadog') self.stats.gauge(stat, value, sample_rate=rate, tags=self._format_tags(tags)) def timing(self, stat, delta, rate=1, tags=None): self.log.info('datadog timing: {} {} {}'.format(stat, delta, tags)) if isinstance(delta, timedelta): delta = delta.total_seconds() * 1000. self.stats.timing(stat, delta, sample_rate=rate, tags=self._format_tags(tags)) @classmethod def _format_tags(cls, tags): if not tags: return None return ['{}:{}'.format(k, v) for k, v in tags.items()] def start(self): self.stats = ThreadStats(namespace='airflow') self.stats.start() register(self.stop) def stop(self): unregister(self.stop) self.stats.stop()
def test_init(self): # Test compress_payload setting t = ThreadStats(compress_payload=True) t.start() assert t.reporter.compress_payload is True t.stop() # Default value t = ThreadStats() t.start() assert t.reporter.compress_payload is False t.stop()
def test_stop(self): dog = ThreadStats() dog.start(flush_interval=1, roll_up_interval=1) for i in range(10): dog.gauge("metric", i) time.sleep(2) flush_count = dog.flush_count assert flush_count dog.stop() for i in range(10): dog.gauge("metric", i) time.sleep(2) for i in range(10): dog.gauge("metric", i) time.sleep(2) assert dog.flush_count in [flush_count, flush_count + 1]
def test_stop(self): dog = ThreadStats() dog.start(flush_interval=1, roll_up_interval=1) for i in range(10): dog.gauge('metric', i) time.sleep(2) flush_count = dog.flush_count assert flush_count dog.stop() for i in range(10): dog.gauge('metric', i) time.sleep(2) for i in range(10): dog.gauge('metric', i) time.sleep(2) assert dog.flush_count in [flush_count, flush_count + 1]
class DatadogMetricsBackend(MetricsBackend): def __init__(self, prefix=None, **kwargs): # TODO(dcramer): it'd be nice if the initialize call wasn't a global initialize(**kwargs) self._stats = ThreadStats() self._stats.start() super(DatadogMetricsBackend, self).__init__(prefix=prefix) def __del__(self): self._stats.stop() def incr(self, key, amount=1, sample_rate=1): self._stats.increment(self._get_key(key), amount, sample_rate=sample_rate) def timing(self, key, value, sample_rate=1): self._stats.timing(self._get_key(key), value, sample_rate=sample_rate)
class DatadogMetricsBackend(MetricsBackend): def __init__(self, prefix=None, **kwargs): self._stats = ThreadStats() self._stats.start() # TODO(dcramer): it'd be nice if the initialize call wasn't a global initialize(**kwargs) super(DatadogMetricsBackend, self).__init__(prefix=prefix) def __del__(self): self._stats.stop() def incr(self, key, amount=1, sample_rate=1): self._stats.increment(self._get_key(key), amount, sample_rate=sample_rate) def timing(self, key, value, sample_rate=1): self._stats.timing(self._get_key(key), value, sample_rate=sample_rate)
def check(self): logging.info('check info') try: yaml_file = os.environ.get('DATADOG_CONF', '%s/aws_redshift_status.yaml' % config.get_confd_path()) yaml_data = yaml.load(file(yaml_file)) init_config = yaml_data['init_config'] interval = init_config.get('min_collection_interval', 300) stats = ThreadStats() stats.start(flush_interval=10, roll_up_interval=1, device=None, flush_in_thread=False, flush_in_greenlet=False, disabled=False) start = time.time() for instance in yaml_data['instances']: logging.debug('instance name is %s' % instance['name']) name, cluster_name, cluster_address, cluster_port, db_name, user_name, user_password, \ aws_access_key_id, aws_secret_access_key, aws_region, query, \ tags = self._load_conf(instance) if cluster_address is None and cluster_port is None: redshift = boto.redshift.connect_to_region(aws_region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) clusters = redshift.describe_clusters(cluster_name) if len(clusters) == 0: raise Exception('Cluster is empty') cluster = clusters['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] endpoint = cluster['Endpoint'] cluster_address = endpoint['Address'] cluster_port = endpoint['Port'] conn = None try: connect_timeout = init_config.get('connect_timeout', 5) conn = psycopg2.connect( host=cluster_address, port=cluster_port, database=db_name, user=user_name, password=user_password, connect_timeout=connect_timeout, ) today = datetime.datetime.utcnow() starttime = (today - datetime.timedelta(seconds=interval)).strftime('%Y-%m-%d %H:%M:%S.%f') endtime = today.strftime('%Y-%m-%d %H:%M:%S.%f') results = self._db_query(conn, QUERY_TABLE_COUNT) stats.gauge('aws.redshift_status.table_count', results[0][0], tags=tags) logging.debug('aws.redshift_status.table_count is %s' % results[0][0]) results = self._db_query(conn, QUERY_NODE) for row in results: gauge_tags = tags[:] gauge_tags.append('node:%s' % row[0]) stats.gauge('aws_redshift_status.node_slice', row[1], tags=gauge_tags) logging.debug('aws_redshift_status.node_slice is %s' % row[1]) results = self._db_query(conn, QUERY_TABLE_RECORD) for row in results: gauge_tags = tags[:] gauge_tags.append('table:%s' % row[0]) stats.gauge('aws_redshift_status.table_records', row[1], tags=gauge_tags) logging.debug('aws_redshift_status.table_records is %s' % row[1]) results = self._db_query(conn, QUERY_TABLE_STATUS) for row in results: gauge_tags = tags[:] gauge_tags.append('table:%s' % row[0]) stats.gauge('aws_redshift_status.table_status.size', row[1], tags=gauge_tags) logging.debug('aws_redshift_status.table_status.size is %s' % row[1]) stats.gauge('aws_redshift_status.table_status.tbl_rows', row[2], tags=gauge_tags) logging.debug('aws_redshift_status.table_status.tbl_rows is %s' % row[2]) stats.gauge('aws_redshift_status.table_status.skew_rows', row[3], tags=gauge_tags) logging.debug('aws_redshift_status.table_status.skew_rows is %s' % row[3]) for q in [ 'select', 'insert', 'update', 'delete', 'analyze' ]: results = self._db_query(conn, QUERY_LOG_TYPE % (starttime, endtime, '%s %%' % q)) for row in results: stats.gauge('aws_redshift_status.query.%s' % q, row[0], tags=tags) logging.debug('aws_redshift_status.query.%s is %s' % (q, row[0])) running_time = time.time() - start stats.gauge('aws_redshift_status.response_time', running_time, tags=tags) logging.debug('aws_redshift_status.response_time is %s' % running_time) finally: if conn: conn.close() stats.flush() stop = stats.stop() logging.debug('Stopping is %s' % stop) except Exception: logging.warning(sys.exc_info())