def test_get_refresh_failure(monkeypatch, tmpdir): tokens.configure(dir=str(tmpdir), url='https://example.org') with open(os.path.join(str(tmpdir), 'user.json'), 'w') as fd: json.dump({'application_username': '******', 'application_password': '******'}, fd) with open(os.path.join(str(tmpdir), 'client.json'), 'w') as fd: json.dump({'client_id': 'cid', 'client_secret': 'sec'}, fd) exc = Exception('FAIL') response = MagicMock() response.raise_for_status.side_effect = exc monkeypatch.setattr('requests.post', lambda url, **kwargs: response) logger = MagicMock() monkeypatch.setattr('tokens.logger', logger) tokens.TOKENS = {'mytok': {'access_token': 'oldtok', 'scopes': ['myscope'], # token is still valid for 10 minutes 'expires_at': time.time() + (10 * 60)}} tok = tokens.get('mytok') assert tok == 'oldtok' logger.warn.assert_called_with('Failed to refresh access token "%s" (but it is still valid): %s', 'mytok', exc) tokens.TOKENS = {'mytok': {'scopes': ['myscope'], 'expires_at': 0}} with pytest.raises(Exception) as exc_info: tok = tokens.get('mytok') assert exc_info.value == exc
def test_read_from_file(monkeypatch, tmpdir): tokens.configure(dir=str(tmpdir)) with open(os.path.join(str(tmpdir), 'mytok-token-secret'), 'w') as fd: fd.write('my-access-token\n') tokens.manage('mytok') tok = tokens.get('mytok') assert tok == 'my-access-token'
def test_get_refresh_failure_ignore_expiration(monkeypatch, tmpdir): tokens.configure(dir=str(tmpdir), url='https://example.org') with open(os.path.join(str(tmpdir), 'user.json'), 'w') as fd: json.dump( { 'application_username': '******', 'application_password': '******' }, fd) with open(os.path.join(str(tmpdir), 'client.json'), 'w') as fd: json.dump({'client_id': 'cid', 'client_secret': 'sec'}, fd) exc = Exception('FAIL') response = MagicMock() response.raise_for_status.side_effect = exc monkeypatch.setattr('requests.post', lambda url, **kwargs: response) logger = MagicMock() monkeypatch.setattr('tokens.logger', logger) tokens.TOKENS = { 'mytok': { 'access_token': 'expired-token', 'ignore_expiration': True, 'scopes': ['myscope'], # expired a long time ago.. 'expires_at': 0 } } tok = tokens.get('mytok') assert tok == 'expired-token' logger.warn.assert_called_with( 'Failed to refresh access token "%s" (ignoring expiration): %s', 'mytok', exc)
def test_get_refresh_failure_ignore_expiration_no_access_token( monkeypatch, tmpdir): tokens.configure(dir=str(tmpdir), url='https://example.org') with open(os.path.join(str(tmpdir), 'user.json'), 'w') as fd: json.dump( { 'application_username': '******', 'application_password': '******' }, fd) with open(os.path.join(str(tmpdir), 'client.json'), 'w') as fd: json.dump({'client_id': 'cid', 'client_secret': 'sec'}, fd) exc = Exception('FAIL') response = MagicMock() response.raise_for_status.side_effect = exc monkeypatch.setattr('requests.post', lambda url, **kwargs: response) # we never got any access token tokens.TOKENS = { 'mytok': { 'ignore_expiration': True, 'scopes': ['myscope'], # expired a long time ago.. 'expires_at': 0 } } with pytest.raises(Exception) as exc_info: tokens.get('mytok') assert exc_info.value == exc
def test_refresh_without_configuration(): # remove URL config tokens.configure(dir='', url='') tokens.manage('mytok', ['scope']) with pytest.raises(tokens.ConfigurationError) as exc_info: tokens.refresh('mytok') assert str(exc_info.value) == 'Configuration error: Missing OAuth access token URL. Either set OAUTH2_ACCESS_TOKEN_URL or use tokens.configure(url=..).'
def test_refresh_invalid_response(monkeypatch, tmpdir): tokens.configure(dir=str(tmpdir), url='https://example.org') tokens.manage('mytok', ['myscope']) tokens.start() # this does not do anything.. response = MagicMock() response.json.return_value = {'foo': 'bar'} post = MagicMock() post.return_value = response monkeypatch.setattr('requests.post', post) monkeypatch.setattr('tokens.read_credentials', lambda path: (VALID_USER_JSON, VALID_CLIENT_JSON)) with pytest.raises(tokens.InvalidTokenResponse) as exc_info: tokens.get('mytok') assert str(exc_info.value) == """Invalid token response: Expected a JSON object with keys "expires_in" and "access_token": 'expires_in'""" # verify that we use a proper HTTP timeout.. post.assert_called_with('https://example.org', data={'username': '******', 'scope': 'myscope', 'password': '******', 'grant_type': 'password'}, headers={'User-Agent': 'python-tokens/{}'.format(tokens.__version__)}, timeout=(1.25, 2.25), auth=('cid', 'sec')) response.json.return_value = {'access_token': '', 'expires_in': 100} with pytest.raises(tokens.InvalidTokenResponse) as exc_info: tokens.get('mytok') assert str(exc_info.value) == 'Invalid token response: Empty "access_token" value'
def test_read_from_file_fail(monkeypatch, tmpdir): tokens.configure(dir=str(tmpdir), from_file_only=True) tokens.manage('mytok') with pytest.raises(tokens.InvalidCredentialsError) as exc_info: tokens.get('mytok') assert str( exc_info.value ) == 'Invalid OAuth credentials: Failed to read token "mytok" from {}.'.format( str(tmpdir))
def get_token(url: str, scopes: str, credentials_dir: str) -> dict: """ Get access token info. """ tokens.configure(url=url, dir=credentials_dir) tokens.manage('lizzy', [scopes]) tokens.start() return tokens.get('lizzy')
def test_refresh_invalid_credentials(monkeypatch, tmpdir): tokens.configure(dir=str(tmpdir), url='https://example.org') tokens.manage('mytok', ['myscope']) tokens.start() # this does not do anything.. with open(os.path.join(str(tmpdir), 'user.json'), 'w') as fd: # missing password json.dump({'application_username': '******'}, fd) with open(os.path.join(str(tmpdir), 'client.json'), 'w') as fd: json.dump({'client_id': 'cid', 'client_secret': 'sec'}, fd) with pytest.raises(tokens.InvalidCredentialsError) as exc_info: tokens.get('mytok') assert str(exc_info.value) == "Invalid OAuth credentials: Missing key: 'application_password'"
def use_berry_token(app_name): tokens.configure() tokens.manage( app_name, ['nakadi.event_stream.read', 'nakadi.event_stream.write', 'nakadi.event_type.write', 'uid']) tokens.start() def _get_token(): try: return tokens.get(app_name) except Exception as e: logging.error('Failed to get token for {}'.format(app_name), exc_info=e) return '' set_provider(_get_token)
def configure(self, conf): """ Called after plugin is loaded to pass the [configuration] section in their plugin info file :param conf: configuration dictionary """ # will use OAUTH2_ACCESS_TOKEN_URL environment variable by default # will try to read application credentials from CREDENTIALS_DIR tokens.configure() token_configuration = conf.get('oauth2.tokens') if token_configuration: for part in token_configuration.split(':'): token_name, scopes = tuple(part.split('=', 1)) tokens.manage(token_name, scopes.split(',')) tokens.manage('uid', ['uid']) tokens.start()
def test_refresh(monkeypatch, tmpdir): tokens.configure(dir=str(tmpdir), url='') tokens.manage('mytok', ['myscope']) with pytest.raises(tokens.ConfigurationError): tokens.refresh('mytok') tokens.configure(dir=str(tmpdir), url='https://example.org') with open(os.path.join(str(tmpdir), 'user.json'), 'w') as fd: json.dump({'application_username': '******', 'application_password': '******'}, fd) with open(os.path.join(str(tmpdir), 'client.json'), 'w') as fd: json.dump({'client_id': 'cid', 'client_secret': 'sec'}, fd) response = MagicMock() response.json.return_value = {'expires_in': 123123, 'access_token': '777'} monkeypatch.setattr('requests.post', lambda url, **kwargs: response) tok = tokens.get('mytok') assert tok == '777'
def test_get_refresh_failure_ignore_expiration_no_access_token(monkeypatch, tmpdir): tokens.configure(dir=str(tmpdir), url='https://example.org') with open(os.path.join(str(tmpdir), 'user.json'), 'w') as fd: json.dump({'application_username': '******', 'application_password': '******'}, fd) with open(os.path.join(str(tmpdir), 'client.json'), 'w') as fd: json.dump({'client_id': 'cid', 'client_secret': 'sec'}, fd) exc = Exception('FAIL') response = MagicMock() response.raise_for_status.side_effect = exc monkeypatch.setattr('requests.post', lambda url, **kwargs: response) # we never got any access token tokens.TOKENS = {'mytok': {'ignore_expiration': True, 'scopes': ['myscope'], # expired a long time ago.. 'expires_at': 0}} with pytest.raises(Exception) as exc_info: tok = tokens.get('mytok') assert exc_info.value == exc
def test_get_refresh_failure_ignore_expiration(monkeypatch, tmpdir): tokens.configure(dir=str(tmpdir), url='https://example.org') with open(os.path.join(str(tmpdir), 'user.json'), 'w') as fd: json.dump({'application_username': '******', 'application_password': '******'}, fd) with open(os.path.join(str(tmpdir), 'client.json'), 'w') as fd: json.dump({'client_id': 'cid', 'client_secret': 'sec'}, fd) exc = Exception('FAIL') response = MagicMock() response.raise_for_status.side_effect = exc monkeypatch.setattr('requests.post', lambda url, **kwargs: response) logger = MagicMock() monkeypatch.setattr('tokens.logger', logger) tokens.TOKENS = {'mytok': {'access_token': 'expired-token', 'ignore_expiration': True, 'scopes': ['myscope'], # expired a long time ago.. 'expires_at': 0}} tok = tokens.get('mytok') assert tok == 'expired-token' logger.warn.assert_called_with('Failed to refresh access token "%s" (ignoring expiration): %s', 'mytok', exc)
import requests import tokens from opentracing_utils import trace, extract_span_from_kwargs from zmon_worker_monitor.zmon_worker.encoder import JsonDataEncoder from zmon_worker_monitor.zmon_worker.errors import NotificationError from zmon_worker_monitor.zmon_worker.common.http import is_absolute_http_url, get_user_agent from notification import BaseNotification logger = logging.getLogger(__name__) tokens.configure() tokens.manage('uid', ['uid']) class NotifyHttp(BaseNotification): @classmethod @trace(operation_name='notification_http', pass_span=True, tags={'notification': 'http'}) def notify(cls, alert, url=None, body=None, params=None, headers=None, timeout=5, oauth2=False, include_alert=True, repeat=0, **kwargs): current_span = extract_span_from_kwargs(**kwargs) urls = cls._config.get('notifications.http.whitelist.urls', []) allow_any = cls._config.get('notifications.http.allow.all', False) default_url = cls._config.get('notifications.http.default.url', None)
def test_read_from_file_fail_raise(monkeypatch, tmpdir): tokens.configure(dir=str(tmpdir)) os.mkdir(os.path.join(str(tmpdir), 'mytok-token-secret')) tokens.manage('mytok') with pytest.raises(IOError) as exc_info: tokens.get('mytok')
#!/usr/bin/env python3 import tokens import datetime import requests import logging import hashlib tokens.configure(dir='./meta/credentials', url='https://auth.zalando.com/oauth2/access_token?realm=/services') tokens.manage('marauder', ['uid']) logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(message)s') while True: a = tokens.refresh('marauder') a= a['data'] data = requests.get('https://auth.zalando.com/oauth2/tokeninfo?access_token={}'.format(a['access_token'])).json() if data['uid'] == 'stups_marauder': tokenhash = '[ERROR] - HASHED ACCESS_TOKEN: {0} - BEARER: {1}'.format(hashlib.sha1((a['access_token']).encode()).hexdigest(), hashlib.sha1(str(a).encode()).hexdigest()) logging.error(tokenhash) for i in range(3): t = requests.get('https://auth.zalando.com/oauth2/tokeninfo?access_token={}'.format(a['access_token'])).json() logging.error('[DEBUG] - CAPTURED TOKEN FOR: \" {0}\" WITH HASHED ACCESS TOKEN: {1}'.format(t['uid'], t['access_token'])) elif data['uid'] != 'stups_marauder': logging.warning('[OK]') else: logging.warning('[UNKONWN ERROR]')
def main(): argp = argparse.ArgumentParser(description='ZMON AWS Agent') argp.add_argument('-e', '--entity-service', dest='entityservice') argp.add_argument('-r', '--region', dest='region', default=None) argp.add_argument('-j', '--json', dest='json', action='store_true') argp.add_argument('-t', '--tracer', dest='tracer', default=os.environ.get('OPENTRACING_TRACER', 'noop')) argp.add_argument('--no-oauth2', dest='disable_oauth2', action='store_true', default=False) argp.add_argument('--postgresql-user', dest='postgresql_user', default=os.environ.get('AGENT_POSTGRESQL_USER')) argp.add_argument('--postgresql-pass', dest='postgresql_pass', default=os.environ.get('AGENT_POSTGRESQL_PASS')) args = argp.parse_args() if not args.disable_oauth2: tokens.configure() tokens.manage('uid', ['uid']) tokens.start() init_opentracing_tracer(args.tracer) root_span = opentracing.tracer.start_span( operation_name='aws_entity_discovery') with root_span: logging.basicConfig(level=logging.INFO) # 0. Fetch extra data for entities entity_extras = {} for ex in os.getenv('EXTRA_ENTITY_FIELDS', '').split(','): if '=' not in ex: continue k, v = ex.split('=', 1) if k and v: entity_extras[k] = v # 1. Determine region if not args.region: logger.info('Trying to figure out region..') try: response = requests.get( 'http://169.254.169.254/latest/meta-data/placement/availability-zone', timeout=2) except Exception: root_span.set_tag('error', True) root_span.log_kv({'exception': traceback.format_exc()}) logger.exception( 'Region was not specified as a parameter and' + 'can not be fetched from instance meta-data!') raise region = response.text[:-1] else: region = args.region root_span.set_tag('region', region) logger.info('Using region: {}'.format(region)) logger.info('Entity service URL: %s', args.entityservice) logger.info('Reading DNS data for hosted zones') aws.populate_dns_data() aws_account_id = aws.get_account_id(region) infrastructure_account = 'aws:{}'.format( aws_account_id) if aws_account_id else None if not infrastructure_account: logger.error( 'AWS agent: Cannot determine infrastructure account ID. Terminating!' ) return root_span.set_tag('account', infrastructure_account) # 2. ZMON entities if not args.disable_oauth2: token = os.getenv('ZMON_TOKEN', None) or tokens.get('uid') zmon_client = Zmon(args.entityservice, token=token, user_agent=get_user_agent()) query = { 'infrastructure_account': infrastructure_account, 'region': region, 'created_by': 'agent' } entities = zmon_client.get_entities(query) # 3. Get running apps apps = aws.get_running_apps(region, entities) elbs = [] scaling_groups = [] rds = [] elasticaches = [] dynamodbs = [] sqs = [] new_entities = [] to_be_removed = [] if len(apps) > 0: elbs = aws.get_running_elbs(region, infrastructure_account) scaling_groups = aws.get_auto_scaling_groups( region, infrastructure_account) rds = aws.get_rds_instances(region, infrastructure_account, entities) elasticaches = aws.get_elasticache_nodes(region, infrastructure_account) dynamodbs = aws.get_dynamodb_tables(region, infrastructure_account) certificates = aws.get_certificates(region, infrastructure_account) aws_limits = aws.get_limits(region, infrastructure_account, apps, elbs, entities) sqs = aws.get_sqs_queues(region, infrastructure_account, entities) postgresql_clusters = postgresql.get_postgresql_clusters( region, infrastructure_account, scaling_groups, apps) account_alias = aws.get_account_alias(region) ia_entity = { 'type': 'local', 'infrastructure_account': infrastructure_account, 'account_alias': account_alias, 'region': region, 'id': 'aws-ac[{}:{}]'.format(infrastructure_account, region), 'created_by': 'agent', } account_alias_prefix = os.getenv('ACCOUNT_ALIAS_PREFIX', None) owner = account_alias if account_alias_prefix: owner = owner.replace(account_alias_prefix, '', 1) root_span.set_tag('team', owner) application_entities = aws.get_apps_from_entities( apps, infrastructure_account, region) if args.postgresql_user and args.postgresql_pass: postgresql_databases = postgresql.get_databases_from_clusters( postgresql_clusters, infrastructure_account, region, args.postgresql_user, args.postgresql_pass) else: # Pretend the list of DBs is empty, but also make sure we don't remove # any pre-existing database entities because we don't know about them. postgresql_databases = [] entities = [ e for e in entities if e.get('type') != 'postgresql_database' ] current_entities = (elbs + scaling_groups + apps + application_entities + rds + postgresql_databases + postgresql_clusters + elasticaches + dynamodbs + certificates + sqs) current_entities.append(aws_limits) current_entities.append(ia_entity) for entity in current_entities: entity.update(entity_extras) # 4. Removing misssing entities existing_ids = get_existing_ids(entities) current_entities_ids = {e['id'] for e in current_entities} to_be_removed, delete_error_count = remove_missing_entities( existing_ids, current_entities_ids, zmon_client, json=args.json) root_span.log_kv({'total_entitites': str(len(current_entities))}) root_span.log_kv({'removed_entities': str(len(to_be_removed))}) logger.info( 'Found {} removed entities from {} entities ({} failed)'.format( len(to_be_removed), len(current_entities), delete_error_count)) # 5. Get new/updated entities new_entities, add_error_count = add_new_entities(current_entities, entities, zmon_client, json=args.json) root_span.log_kv({'new_entities': str(len(new_entities))}) logger.info( 'Found {} new entities from {} entities ({} failed)'.format( len(new_entities), len(current_entities), add_error_count)) # 6. Always add Local entity if not args.json: ia_entity['errors'] = { 'delete_count': delete_error_count, 'add_count': add_error_count } update_local_entity(zmon_client, ia_entity) types = { e['type']: len([t for t in new_entities if t['type'] == e['type']]) for e in new_entities } for t, v in types.items(): logger.info('Found {} new entities of type: {}'.format(v, t)) # Check if it is a dry run! if args.json: d = { 'applications': application_entities, 'apps': apps, 'dynamodb': dynamodbs, 'elbs': elbs, 'elc': elasticaches, 'rds': rds, 'certificates': certificates, 'aws_limits': aws_limits, 'sqs_queues': sqs, 'new_entities': new_entities, 'to_be_removed': to_be_removed, 'posgresql_clusters': postgresql_clusters } print(json.dumps(d, indent=4))
import os import requests import tokens from zmon_worker_monitor.zmon_worker.errors import ConfigurationError from zmon_worker_monitor.builtins.plugins.distance_to_history import DistanceWrapper from zmon_worker_monitor.adapters.ifunctionfactory_plugin import IFunctionFactoryPlugin, propartial logger = logging.getLogger(__name__) # will use OAUTH2_ACCESS_TOKEN_URL environment variable by default # will try to read application credentials from CREDENTIALS_DIR tokens.configure() tokens.manage('uid', ['uid']) tokens.start() ONE_WEEK = 7 * 24 * 60 * 60 ONE_WEEK_AND_5MIN = ONE_WEEK + 5 * 60 DATAPOINTS_ENDPOINT = 'api/v1/datapoints/query' class HistoryFactory(IFunctionFactoryPlugin): def __init__(self): super(HistoryFactory, self).__init__() # fields from configuration def configure(self, conf):
import requests import sys import urllib import urlparse import logging import os from collections import defaultdict from zmon_worker_monitor.zmon_worker.errors import HttpError from requests.adapters import HTTPAdapter from zmon_worker_monitor.adapters.ifunctionfactory_plugin import IFunctionFactoryPlugin, propartial import tokens tokens.configure(url=os.environ.get('OAUTH_TOKEN_SERVICE_URL','')) tokens.manage('zmon', ['zmon']) tokens.start() logger = logging.getLogger('zmon-worker.http-function') class HttpFactory(IFunctionFactoryPlugin): def __init__(self): super(HttpFactory, self).__init__() def configure(self, conf): """ Called after plugin is loaded to pass the [configuration] section in their plugin info file :param conf: configuration dictionary """
def get_token(config: dict, token_name: str, scopes: list): oauth_access_token_url = config.get('oauth_access_token_url') token_url = config.get('token_service_url') if not oauth_access_token_url: logging.warning( 'No OAuth access token URL configured in Taupage YAML ("oauth_access_token_url" property)' ) if not token_url: logging.warning( 'No token service URL configured in Taupage YAML ("token_service_url" property)' ) if not oauth_access_token_url and not token_url: # neither of the URLs is given, no chance to continue return if not config.get('mint_bucket'): # berry will only be started if a mint bucket is configured, # skip OAuth token retrieval if this is not the case logging.warning( 'No mint bucket configured in Taupage YAML ("mint_bucket" property)' ) return user_path = os.path.join(CREDENTIALS_DIR, 'user.json') client_path = os.path.join(CREDENTIALS_DIR, 'client.json') while not os.path.exists(user_path): logging.info( 'Waiting for berry to download OAuth credentials to {}..'.format( user_path)) time.sleep(5) with open(user_path) as fd: user_credentials = json.load(fd) user = user_credentials.get('application_username') passwd = user_credentials.get('application_password') if not user or not passwd: logging.warning( 'Invalid OAuth user credentials: application user and/or password missing in %s', user_path) return try: with open(client_path) as fd: client_credentials = json.load(fd) except: logging.warning('Invalid OAuth client credentials: could not read %s', client_path) # we might continue as Token Service does not require client credentials client_credentials = {} client_id = client_credentials.get('client_id') if client_id and oauth_access_token_url: # we have a client_id and the OAuth provider's URL # => we can use the OAuth provider directly # NOTE: the client_secret can be null tokens.configure(url=oauth_access_token_url, dir=CREDENTIALS_DIR) tokens.manage(token_name, scopes) access_token = tokens.get(token_name) return {'access_token': access_token} else: # fallback to custom Token Service # Token Service only requires user and password num_retries = 3 token = False while num_retries > 0: try: token = zign.api.get_named_token(scopes, 'services', token_name, user, passwd, url=token_url, use_keyring=False) break except zign.api.ServerError as e: logging.info( 'Encountered error while obtaining token {}, will retry {} times. {}' .format(token_name, num_retries, e)) num_retries -= 1 time.sleep(30) if not token: raise Exception('Could not obtain token {}'.format(token_name)) return token
def main(): argp = argparse.ArgumentParser(description='ZMon AWS Agent') argp.add_argument('-e', '--entity-service', dest='entityservice') argp.add_argument('-r', '--region', dest='region', default=None) argp.add_argument('-j', '--json', dest='json', action='store_true') argp.add_argument('--no-oauth2', dest='disable_oauth2', action='store_true', default=False) args = argp.parse_args() if not args.disable_oauth2: tokens.configure() tokens.manage('uid', ['uid']) tokens.start() logging.basicConfig(level=logging.INFO) if not args.region: logging.info("Trying to figure out region...") try: response = requests.get('http://169.254.169.254/latest/meta-data/placement/availability-zone', timeout=2) except: logging.error("Region was not specified as a parameter and can not be fetched from instance meta-data!") raise region = response.text[:-1] else: region = args.region logging.info("Using region: {}".format(region)) logging.info("Entity service url: %s", args.entityservice) apps = get_running_apps(region) if len(apps) > 0: infrastructure_account = apps[0]['infrastructure_account'] elbs = get_running_elbs(region, infrastructure_account) scaling_groups = get_auto_scaling_groups(region, infrastructure_account) rds = get_rds_instances(region, infrastructure_account) elasticaches = get_elasticache_nodes(region, infrastructure_account) dynamodbs = get_dynamodb_tables(region, infrastructure_account) else: elbs = [] scaling_groups = [] rds = [] if args.json: d = {'apps': apps, 'elbs': elbs, 'rds': rds, 'elc': elasticaches, 'dynamodb': dynamodbs} print(json.dumps(d)) else: if infrastructure_account is not None: account_alias = get_account_alias(region) ia_entity = {"type": "local", "infrastructure_account": infrastructure_account, "account_alias": account_alias, "region": region, "id": "aws-ac[{}:{}]".format(infrastructure_account, region), "created_by": "agent"} application_entities = get_apps_from_entities(apps, infrastructure_account, region) current_entities = [] for e in elbs: current_entities.append(e["id"]) for e in scaling_groups: current_entities.append(e["id"]) for a in apps: current_entities.append(a["id"]) for a in application_entities: current_entities.append(a["id"]) for a in rds: current_entities.append(a["id"]) for a in elasticaches: current_entities.append(a["id"]) for a in dynamodbs: current_entities.append(a["id"]) current_entities.append(ia_entity["id"]) headers = {'Content-Type': 'application/json'} if not args.disable_oauth2: token = os.getenv('ZMON_AGENT_TOKEN', tokens.get('uid')) logging.info("Adding oauth2 token to requests {}...{}".format(token[:1], token[-1:])) headers.update({'Authorization': 'Bearer {}'.format(token)}) # removing all entities query = {'infrastructure_account': infrastructure_account, 'region': region, 'created_by': 'agent'} r = requests.get(args.entityservice, params={'query': json.dumps(query)}, headers=headers, timeout=10) entities = r.json() existing_entities = {} to_remove = [] for e in entities: existing_entities[e['id']] = e if not e["id"] in current_entities: to_remove.append(e["id"]) if os.getenv('zmon_user'): auth = (os.getenv('zmon_user'), os.getenv('zmon_password', '')) else: auth = None for e in to_remove: logging.info("removing instance: {}".format(e)) r = requests.delete(args.entityservice + "{}/".format(e), auth=auth, headers=headers, timeout=3) logging.info("...%s", r.status_code) def put_entity(entity_type, entity): logging.info("Adding {} entity: {}".format(entity_type, entity['id'])) r = requests.put(args.entityservice, auth=auth, data=json.dumps(entity, default=json_serial), headers=headers, timeout=3) logging.info("...%s", r.status_code) put_entity('LOCAL', ia_entity) for instance in apps: put_entity('instance', instance) for asg in scaling_groups: put_entity('Auto Scaling group', asg) for elb in elbs: put_entity('elastic load balancer', elb) for db in rds: put_entity('RDS instance', db) # merge here or we loose it on next pull for app in application_entities: if app['id'] in existing_entities: ex = existing_entities[app['id']] if 'scalyr_ts_id' in ex: app['scalyr_ts_id'] = ex['scalyr_ts_id'] for app in application_entities: put_entity('application', app) for elasticache in elasticaches: put_entity('elasticache', elasticache) for dynamodb in dynamodbs: put_entity('dynamodb', dynamodb)
def get_token(config: dict, token_name: str, scopes: list): oauth_access_token_url = config.get('oauth_access_token_url') token_url = config.get('token_service_url') if not oauth_access_token_url: logging.warning('No OAuth access token URL configured in Taupage YAML ("oauth_access_token_url" property)') if not token_url: logging.warning('No token service URL configured in Taupage YAML ("token_service_url" property)') if not oauth_access_token_url and not token_url: # neither of the URLs is given, no chance to continue return if not config.get('mint_bucket'): # berry will only be started if a mint bucket is configured, # skip OAuth token retrieval if this is not the case logging.warning('No mint bucket configured in Taupage YAML ("mint_bucket" property)') return user_path = os.path.join(CREDENTIALS_DIR, 'user.json') client_path = os.path.join(CREDENTIALS_DIR, 'client.json') while not os.path.exists(user_path): logging.info('Waiting for berry to download OAuth credentials to {}..'.format(user_path)) time.sleep(5) with open(user_path) as fd: user_credentials = json.load(fd) user = user_credentials.get('application_username') passwd = user_credentials.get('application_password') if not user or not passwd: logging.warning('Invalid OAuth user credentials: application user and/or password missing in %s', user_path) return try: with open(client_path) as fd: client_credentials = json.load(fd) except: logging.warning('Invalid OAuth client credentials: could not read %s', client_path) # we might continue as Token Service does not require client credentials client_credentials = {} client_id = client_credentials.get('client_id') if client_id and oauth_access_token_url: # we have a client_id and the OAuth provider's URL # => we can use the OAuth provider directly # NOTE: the client_secret can be null tokens.configure(url=oauth_access_token_url, dir=CREDENTIALS_DIR) tokens.manage(token_name, scopes) access_token = tokens.get(token_name) return {'access_token': access_token} else: # fallback to custom Token Service # Token Service only requires user and password num_retries = 3 token = False while num_retries > 0: try: token = zign.api.get_named_token( scopes, 'services', token_name, user, passwd, url=token_url, use_keyring=False) break except zign.api.ServerError as e: logging.info('Encountered error while obtaining token {}, will retry {} times. {}'.format( token_name, num_retries, e)) num_retries -= 1 time.sleep(30) if not token: raise Exception('Could not obtain token {}'.format(token_name)) return token
#!/usr/bin/env python import docker import pierone.api import sys import tokens pierone_url = "https://pierone.stups.zalan.do" credentials_dir = "/meta/credentials" oauth_access_token_url = "https://token.services.auth.zalando.com/oauth2/access_token?realm=/services" tokens.configure(url=oauth_access_token_url, dir=credentials_dir) tokens.manage("pierone", ["uid"]) pierone.api.docker_login_with_token(pierone_url, tokens.get("pierone")) client = docker.from_env(version='1.24') client.images.pull("pierone.stups.zalan.do/{}/{}:{}".format(*sys.argv[1:4]))
def main(): argp = argparse.ArgumentParser(description='ZMON AWS Agent') argp.add_argument('-e', '--entity-service', dest='entityservice') argp.add_argument('-r', '--region', dest='region', default=None) argp.add_argument('-j', '--json', dest='json', action='store_true') argp.add_argument('--no-oauth2', dest='disable_oauth2', action='store_true', default=False) argp.add_argument('--postgresql-user', dest='postgresql_user', default=os.environ.get('AGENT_POSTGRESQL_USER')) argp.add_argument('--postgresql-pass', dest='postgresql_pass', default=os.environ.get('AGENT_POSTGRESQL_PASS')) args = argp.parse_args() if not args.disable_oauth2: tokens.configure() tokens.manage('uid', ['uid']) tokens.start() logging.basicConfig(level=logging.INFO) # 1. Determine region if not args.region: logger.info('Trying to figure out region..') try: response = requests.get( 'http://169.254.169.254/latest/meta-data/placement/availability-zone', timeout=2) except: logger.exception( 'Region was not specified as a parameter and can not be fetched from instance meta-data!' ) raise region = response.text[:-1] else: region = args.region logger.info('Using region: {}'.format(region)) logger.info('Entity service URL: %s', args.entityservice) logger.info('Reading DNS data for hosted zones') aws.populate_dns_data() aws_account_id = aws.get_account_id(region) infrastructure_account = 'aws:{}'.format( aws_account_id) if aws_account_id else None if not infrastructure_account: logger.error( 'AWS agent: Cannot determine infrastructure account ID. Terminating!' ) return # 2. ZMON entities token = None if args.disable_oauth2 else tokens.get('uid') zmon_client = Zmon(args.entityservice, token=token, user_agent=get_user_agent()) query = { 'infrastructure_account': infrastructure_account, 'region': region, 'created_by': 'agent' } entities = zmon_client.get_entities(query) # 3. Get running apps apps = aws.get_running_apps(region, entities) elbs = [] scaling_groups = [] rds = [] elasticaches = [] dynamodbs = [] sqs = [] new_entities = [] to_be_removed = [] if len(apps) > 0: elbs = aws.get_running_elbs(region, infrastructure_account) scaling_groups = aws.get_auto_scaling_groups(region, infrastructure_account) rds = aws.get_rds_instances(region, infrastructure_account, entities) elasticaches = aws.get_elasticache_nodes(region, infrastructure_account) dynamodbs = aws.get_dynamodb_tables(region, infrastructure_account) certificates = aws.get_certificates(region, infrastructure_account) aws_limits = aws.get_limits(region, infrastructure_account, apps, elbs) sqs = aws.get_sqs_queues(region, infrastructure_account, entities) account_alias = aws.get_account_alias(region) ia_entity = { 'type': 'local', 'infrastructure_account': infrastructure_account, 'account_alias': account_alias, 'region': region, 'id': 'aws-ac[{}:{}]'.format(infrastructure_account, region), 'created_by': 'agent', } application_entities = aws.get_apps_from_entities(apps, infrastructure_account, region) if args.postgresql_user and args.postgresql_pass: postgresql_clusters = zmon_client.get_entities({ 'infrastructure_account': infrastructure_account, 'region': region, 'type': 'postgresql_cluster' }) postgresql_databases = postgresql.get_databases_from_clusters( postgresql_clusters, infrastructure_account, region, args.postgresql_user, args.postgresql_pass) else: # Pretend the list of DBs is empty, but also make sure we don't remove # any pre-existing database entities because we don't know about them. postgresql_databases = [] entities = [ e for e in entities if e.get('type') != 'postgresql_database' ] current_entities = (elbs + scaling_groups + apps + application_entities + rds + postgresql_databases + elasticaches + dynamodbs + certificates + sqs) current_entities.append(aws_limits) current_entities.append(ia_entity) # 4. Removing misssing entities existing_ids = get_existing_ids(entities) current_entities_ids = {e['id'] for e in current_entities} to_be_removed, delete_error_count = remove_missing_entities( existing_ids, current_entities_ids, zmon_client, json=args.json) logger.info( 'Found {} removed entities from {} entities ({} failed)'.format( len(new_entities), len(current_entities), delete_error_count)) # 5. Get new/updated entities new_entities, add_error_count = add_new_entities(current_entities, entities, zmon_client, json=args.json) logger.info('Found {} new entities from {} entities ({} failed)'.format( len(new_entities), len(current_entities), add_error_count)) # 6. Always add Local entity if not args.json: ia_entity['errors'] = { 'delete_count': delete_error_count, 'add_count': add_error_count } try: zmon_client.add_entity(ia_entity) except: logger.exception( 'Failed to add Local entity: {}'.format(ia_entity)) types = { e['type']: len([t for t in new_entities if t['type'] == e['type']]) for e in new_entities } for t, v in types.items(): logger.info('Found {} new entities of type: {}'.format(v, t)) # Check if it is a dry run! if args.json: d = { 'applications': application_entities, 'apps': apps, 'dynamodb': dynamodbs, 'elbs': elbs, 'elc': elasticaches, 'rds': rds, 'certificates': certificates, 'aws_limits': aws_limits, 'sqs_queues': sqs, 'new_entities': new_entities, 'to_be_removed': to_be_removed, } print(json.dumps(d, indent=4))
def main(): argp = argparse.ArgumentParser(description='ZMON Kubernetes Agent') argp.add_argument( '-i', '--infrastructure-account', dest='infrastructure_account', default=None, help= 'Infrastructure account which identifies this agent. Can be set via ' 'ZMON_AGENT_INFRASTRUCTURE_ACCOUNT env variable.') argp.add_argument( '-r', '--region', dest='region', help='Cluster region. Can be set via ZMON_AGENT_REGION env variable.') argp.add_argument( '-d', '--discover', dest='discover', help= ('Comma separated list of builtin discovery agents to be used. Current supported discovery ' 'agents are {}. Can be set via ZMON_AGENT_BUILTIN_DISCOVERY env variable.' ).format(BUILTIN_DISCOVERY)) argp.add_argument( '-e', '--entity-service', dest='entity_service', help= 'ZMON backend URL. Can be set via ZMON_AGENT_ENTITY_SERVICE_URL env variable.' ) argp.add_argument( '--interval', dest='interval', help= 'Interval for agent sync. If not set then agent will run once. Can be set via ' 'ZMON_AGENT_INTERVAL env variable.') # OPENTRACING SUPPORT argp.add_argument( '--opentracing', dest='opentracing', default=os.environ.get('ZMON_AGENT_OPENTRACING_TRACER'), help= 'OpenTracing tracer name as supported by opentracing-utils. Please Ignore for NOOP tracer.' ) argp.add_argument('-j', '--json', dest='json', action='store_true', help='Print JSON output only.', default=False) argp.add_argument('--skip-ssl', dest='skip_ssl', action='store_true', default=False) argp.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='Verbose output.') args = argp.parse_args() logger.info('Initializing opentracing tracer: {}'.format( args.opentracing if args.opentracing else 'noop')) init_opentracing_tracer(args.opentracing) # Give some time for tracer initialization if args.opentracing: time.sleep(2) init_span = opentracing.tracer.start_span(operation_name='zmon-agent-init') with init_span: # Hard requirements infrastructure_account = ( args.infrastructure_account if args.infrastructure_account else os.environ.get('ZMON_AGENT_INFRASTRUCTURE_ACCOUNT')) if not infrastructure_account: init_span.set_tag('error', True) raise RuntimeError( 'Cannot determine infrastructure account. Please use --infrastructure-account option or ' 'set env variable ZMON_AGENT_INFRASTRUCTURE_ACCOUNT.') region = os.environ.get('ZMON_AGENT_REGION', args.region) entity_service = os.environ.get('ZMON_AGENT_ENTITY_SERVICE_URL', args.entity_service) interval = os.environ.get('ZMON_AGENT_INTERVAL', args.interval) init_span.set_tag('account', infrastructure_account) init_span.set_tag('region', region) if interval: interval = int(interval) # OAUTH2 tokens tokens.configure() tokens.manage('uid', ['uid']) verbose = args.verbose if args.verbose else os.environ.get( 'ZMON_AGENT_DEBUG', False) if verbose: logger.setLevel(logging.DEBUG) verify = True if args.skip_ssl: logger.warning('ZMON agent will skip SSL verification!') verify = False if not region: # Assuming running on AWS logger.info('Trying to figure out region ...') try: response = requests.get( 'http://169.254.169.254/latest/meta-data/placement/availability-zone', timeout=2) response.raise_for_status() region = response.text[:-1] except Exception: init_span.set_tag('error', True) logger.error( 'AWS region was not specified and can not be fetched from instance meta-data!' ) raise logger.info('Starting sync operations!') sync(infrastructure_account, region, entity_service, verify, args.json, interval) if args.opentracing: time.sleep(5)
def main(): argp = argparse.ArgumentParser(description='ZMON AWS Agent') argp.add_argument('-e', '--entity-service', dest='entityservice') argp.add_argument('-r', '--region', dest='region', default=None) argp.add_argument('-j', '--json', dest='json', action='store_true') argp.add_argument('-t', '--tracer', dest='tracer', default=os.environ.get('OPENTRACING_TRACER', 'noop')) argp.add_argument('--no-oauth2', dest='disable_oauth2', action='store_true', default=False) argp.add_argument('--postgresql-user', dest='postgresql_user', default=os.environ.get('AGENT_POSTGRESQL_USER')) argp.add_argument('--postgresql-pass', dest='postgresql_pass', default=os.environ.get('AGENT_POSTGRESQL_PASS')) args = argp.parse_args() if not args.disable_oauth2: tokens.configure() tokens.manage('uid', ['uid']) tokens.start() init_opentracing_tracer(args.tracer) root_span = opentracing.tracer.start_span(operation_name='aws_entity_discovery') with root_span: logging.basicConfig(level=logging.INFO) # 0. Fetch extra data for entities entity_extras = {} for ex in os.getenv('EXTRA_ENTITY_FIELDS', '').split(','): if '=' not in ex: continue k, v = ex.split('=', 1) if k and v: entity_extras[k] = v # 1. Determine region if not args.region: logger.info('Trying to figure out region..') try: response = requests.get('http://169.254.169.254/latest/meta-data/placement/availability-zone', timeout=2) except Exception: root_span.set_tag('error', True) root_span.log_kv({'exception': traceback.format_exc()}) logger.exception('Region was not specified as a parameter and' + 'can not be fetched from instance meta-data!') raise region = response.text[:-1] else: region = args.region root_span.set_tag('region', region) logger.info('Using region: {}'.format(region)) logger.info('Entity service URL: %s', args.entityservice) logger.info('Reading DNS data for hosted zones') aws.populate_dns_data() aws_account_id = aws.get_account_id(region) infrastructure_account = 'aws:{}'.format(aws_account_id) if aws_account_id else None if not infrastructure_account: logger.error('AWS agent: Cannot determine infrastructure account ID. Terminating!') return root_span.set_tag('account', infrastructure_account) # 2. ZMON entities if not args.disable_oauth2: token = os.getenv('ZMON_TOKEN', None) or tokens.get('uid') zmon_client = Zmon(args.entityservice, token=token, user_agent=get_user_agent()) query = {'infrastructure_account': infrastructure_account, 'region': region, 'created_by': 'agent'} entities = zmon_client.get_entities(query) # 3. Get running apps apps = aws.get_running_apps(region, entities) elbs = [] scaling_groups = [] elastigroups = [] certificates = [] rds = [] elasticaches = [] dynamodbs = [] sqs = [] postgresql_clusters = [] aws_limits = [] new_entities = [] to_be_removed = [] if len(apps) > 0: elbs = aws.get_running_elbs(region, infrastructure_account) scaling_groups = aws.get_auto_scaling_groups(region, infrastructure_account) elastigroups = elastigroup.get_elastigroup_entities(region, infrastructure_account) rds = aws.get_rds_instances(region, infrastructure_account, entities) elasticaches = aws.get_elasticache_nodes(region, infrastructure_account) dynamodbs = aws.get_dynamodb_tables(region, infrastructure_account) certificates = aws.get_certificates(region, infrastructure_account) aws_limits = aws.get_limits(region, infrastructure_account, apps, elbs, entities) sqs = aws.get_sqs_queues(region, infrastructure_account, entities) postgresql_clusters = postgresql.get_postgresql_clusters(region, infrastructure_account, scaling_groups, apps) account_alias = aws.get_account_alias(region) ia_entity = { 'type': 'local', 'infrastructure_account': infrastructure_account, 'account_alias': account_alias, 'region': region, 'id': 'aws-ac[{}:{}]'.format(infrastructure_account, region), 'created_by': 'agent', } account_alias_prefix = os.getenv('ACCOUNT_ALIAS_PREFIX', None) owner = account_alias if account_alias_prefix: owner = owner.replace(account_alias_prefix, '', 1) root_span.set_tag('team', owner) application_entities = aws.get_apps_from_entities(apps, infrastructure_account, region) if args.postgresql_user and args.postgresql_pass: postgresql_databases = postgresql.get_databases_from_clusters(postgresql_clusters, infrastructure_account, region, args.postgresql_user, args.postgresql_pass) else: # Pretend the list of DBs is empty, but also make sure we don't remove # any pre-existing database entities because we don't know about them. postgresql_databases = [] entities = [e for e in entities if e.get('type') != 'postgresql_database'] current_entities = ( elbs + scaling_groups + elastigroups + apps + application_entities + rds + postgresql_databases + postgresql_clusters + elasticaches + dynamodbs + certificates + sqs) current_entities.append(aws_limits) current_entities.append(ia_entity) for entity in current_entities: entity.update(entity_extras) # 4. Removing misssing entities existing_ids = get_existing_ids(entities) current_entities_ids = {e['id'] for e in current_entities} to_be_removed, delete_error_count = remove_missing_entities( existing_ids, current_entities_ids, zmon_client, json=args.json) root_span.log_kv({'total_entitites': str(len(current_entities))}) root_span.log_kv({'removed_entities': str(len(to_be_removed))}) logger.info('Found {} removed entities from {} entities ({} failed)'.format( len(to_be_removed), len(current_entities), delete_error_count)) # 5. Get new/updated entities new_entities, add_error_count = add_new_entities(current_entities, entities, zmon_client, json=args.json) root_span.log_kv({'new_entities': str(len(new_entities))}) logger.info('Found {} new entities from {} entities ({} failed)'.format( len(new_entities), len(current_entities), add_error_count)) # 6. Always add Local entity if not args.json: ia_entity['errors'] = {'delete_count': delete_error_count, 'add_count': add_error_count} update_local_entity(zmon_client, ia_entity) types = {e['type']: len([t for t in new_entities if t['type'] == e['type']]) for e in new_entities} for t, v in types.items(): logger.info('Found {} new entities of type: {}'.format(v, t)) # Check if it is a dry run! if args.json: d = { 'applications': application_entities, 'apps': apps, 'elastigroups': elastigroups, 'dynamodb': dynamodbs, 'elbs': elbs, 'elc': elasticaches, 'rds': rds, 'certificates': certificates, 'aws_limits': aws_limits, 'sqs_queues': sqs, 'new_entities': new_entities, 'to_be_removed': to_be_removed, 'posgresql_clusters': postgresql_clusters } print(json.dumps(d, indent=4))
from pathlib import Path from urllib.parse import urljoin import kubernetes.client import kubernetes.config import requests import tokens from requests.auth import AuthBase # default URL points to kubectl proxy DEFAULT_CLUSTERS = "http://localhost:8001/" CLUSTER_ID_INVALID_CHARS = re.compile("[^a-z0-9:-]") logger = logging.getLogger(__name__) tokens.configure(from_file_only=True) def generate_cluster_id(url: str): """Generate some "cluster ID" from given API server URL""" for prefix in ("https://", "http://"): if url.startswith(prefix): url = url[len(prefix):] return CLUSTER_ID_INVALID_CHARS.sub("-", url.lower()).strip("-") class StaticAuthorizationHeaderAuth(AuthBase): """Static authentication with given "Authorization" header""" def __init__(self, authorization): self.authorization = authorization
from pathlib import Path from urllib.parse import urljoin import kubernetes.client import kubernetes.config import requests import tokens from requests.auth import AuthBase # default URL points to kubectl proxy DEFAULT_CLUSTERS = 'http://localhost:8001/' CLUSTER_ID_INVALID_CHARS = re.compile('[^a-z0-9:-]') logger = logging.getLogger(__name__) tokens.configure(from_file_only=True) def generate_cluster_id(url: str): '''Generate some "cluster ID" from given API server URL''' for prefix in ('https://', 'http://'): if url.startswith(prefix): url = url[len(prefix):] return CLUSTER_ID_INVALID_CHARS.sub('-', url.lower()).strip('-') class StaticAuthorizationHeaderAuth(AuthBase): '''Static authentication with given "Authorization" header''' def __init__(self, authorization): self.authorization = authorization