def __init__(self, storage_id, queue_name, policy_uri, log_group=None, metrics=None, output_dir=None): logging.basicConfig(level=logging.INFO, format='%(message)s') log.info("Running Azure Cloud Custodian Self-Host") resources.load_available() self.session = local_session(Session) self.storage_session = self.session storage_subscription_id = ResourceIdParser.get_subscription_id(storage_id) if storage_subscription_id != self.session.subscription_id: self.storage_session = Session(subscription_id=storage_subscription_id) # Load configuration self.options = Host.build_options(output_dir, log_group, metrics) self.policy_storage_uri = policy_uri self.event_queue_id = storage_id self.event_queue_name = queue_name # Default event queue name is the subscription ID if not self.event_queue_name: self.event_queue_name = self.session.subscription_id # Prepare storage bits self.policy_blob_client = None self.blob_cache = {} self.queue_storage_account = self.prepare_queue_storage( self.event_queue_id, self.event_queue_name) self.queue_service = None # Register event subscription self.update_event_subscription() # Policy cache and dictionary self.policy_cache = tempfile.mkdtemp() self.policies = {} # Configure scheduler self.scheduler = BlockingScheduler(Host.get_scheduler_config()) logging.getLogger('apscheduler.executors.default').setLevel(logging.ERROR) logging.getLogger('apscheduler').setLevel(logging.ERROR) # Schedule recurring policy updates self.scheduler.add_job(self.update_policies, 'interval', seconds=policy_update_seconds, id="update_policies", next_run_time=datetime.now(), executor='threadpool') # Schedule recurring queue polling self.scheduler.add_job(self.poll_queue, 'interval', seconds=queue_poll_seconds, id="poll_queue", executor='threadpool') self.scheduler.start()
def stream(repo_uri, stream_uri, verbose, assume, sort, before=None, after=None, policy_pattern=()): """Stream git history policy changes to destination. Default stream destination is a summary of the policy changes to stdout, one per line. Also supported for stdout streaming is `jsonline`. AWS Kinesis and SQS destinations are specified by providing the ARN. Database destinations are supported by providing a sqlalchemy DSN. Note SQLAlchemy and db drivers must be installed separately as they an optional dependency. When using database destinations, streaming defaults to incremental. """ logging.basicConfig( format="%(asctime)s: %(name)s:%(levelname)s %(message)s", level=(verbose and logging.DEBUG or logging.INFO)) logging.getLogger('botocore').setLevel(logging.WARNING) if before: before = parse(before) if after: after = parse(after) if sort: sort = reduce(operator.or_, [SORT_TYPE[s] for s in sort]) matcher = None if policy_pattern: matcher = partial(policy_path_matcher, patterns=policy_pattern) with contextlib.closing(TempDir().open()) as temp_dir: if repo_uri is None: repo_uri = pygit2.discover_repository(os.getcwd()) log.debug("Using repository %s", repo_uri) if repo_uri.startswith('http') or repo_uri.startswith('git@'): log.info("Cloning repository: %s", repo_uri) repo = pygit2.clone_repository(repo_uri, temp_dir.path) else: repo = pygit2.Repository(repo_uri) load_available() policy_repo = PolicyRepo(repo_uri, repo, matcher) change_count = 0 with contextlib.closing(transport(stream_uri, assume)) as t: if after is None and isinstance(t, IndexedTransport): after = t.last() for change in policy_repo.delta_stream(after=after, before=before): change_count += 1 t.send(change) log.info("Streamed %d policy repo changes", change_count) return change_count
def test_iam_permissions_validity(self): cfg = Config.empty() missing = set() invalid = [] perms = load_data('iam-actions.json') resources.load_available() for k, v in manager.resources.items(): p = Bag({ 'name': 'permcheck', 'resource': k, 'provider_name': 'aws' }) ctx = self.get_context(config=cfg, policy=p) mgr = v(ctx, p) # if getattr(mgr, 'permissions', None): # print(mgr) found = False for s in (mgr.resource_type.service, getattr(mgr.resource_type, 'permission_prefix', None)): if s in perms: found = True if not found: missing.add("%s->%s" % (k, mgr.resource_type.service)) continue invalid.extend( self.check_permissions(perms, mgr.get_permissions(), k)) for n, a in v.action_registry.items(): p['actions'] = [n] invalid.extend( self.check_permissions(perms, a({}, mgr).get_permissions(), "{k}.actions.{n}".format(k=k, n=n))) for n, f in v.filter_registry.items(): if n in ('or', 'and', 'not', 'missing'): continue p['filters'] = [n] invalid.extend( self.check_permissions(perms, f({}, mgr).get_permissions(), "{k}.filters.{n}".format(k=k, n=n))) if missing: raise ValueError("resources missing service %s" % ('\n'.join(sorted(missing)))) if invalid: raise ValueError("invalid permissions \n %s" % ('\n'.join(sorted(map(str, invalid)))))
def report_account(account, region, policies_config, output_path, cache_path, debug): output_path = os.path.join(output_path, account['name'], region) cache_path = os.path.join(cache_path, "%s-%s.cache" % (account['name'], region)) load_available() config = Config.empty(region=region, output_dir=output_path, account_id=account['account_id'], metrics_enabled=False, cache=cache_path, log_group=None, profile=None, external_id=None) if account.get('role'): config['assume_role'] = account['role'] config['external_id'] = account.get('external_id') elif account.get('profile'): config['profile'] = account['profile'] policies = PolicyCollection.from_data(policies_config, config) records = [] for p in policies: # initializee policy execution context for output access p.ctx.initialize() log.debug("Report policy:%s account:%s region:%s path:%s", p.name, account['name'], region, output_path) if p.ctx.output.type == "s3": delta = timedelta(days=1) begin_date = datetime.now() - delta policy_records = record_set( p.session_factory, p.ctx.output.config['netloc'], strip_output_path(p.ctx.output.config['path'], p.name), begin_date) else: policy_records = fs_record_set(p.ctx.log_dir, p.name) for r in policy_records: r['policy'] = p.name r['region'] = p.options.region r['account'] = account['name'] for t in account.get('tags', ()): if ':' in t: k, v = t.split(':', 1) r[k] = v records.extend(policy_records) return records
def schema_completer(prefix): """ For tab-completion via argcomplete, return completion options. For the given prefix so far, return the possible options. Note that filtering via startswith happens after this list is returned. """ from c7n import schema load_available() components = prefix.split('.') if components[0] in provider.clouds.keys(): cloud_provider = components.pop(0) provider_resources = provider.resources(cloud_provider) else: cloud_provider = 'aws' provider_resources = provider.resources('aws') components[0] = "aws.%s" % components[0] # Completions for resource if len(components) == 1: choices = [ r for r in provider.resources().keys() if r.startswith(components[0]) ] if len(choices) == 1: choices += ['{}{}'.format(choices[0], '.')] return choices if components[0] not in provider_resources.keys(): return [] # Completions for category if len(components) == 2: choices = [ '{}.{}'.format(components[0], x) for x in ('actions', 'filters') if x.startswith(components[1]) ] if len(choices) == 1: choices += ['{}{}'.format(choices[0], '.')] return choices # Completions for item elif len(components) == 3: resource_mapping = schema.resource_vocabulary(cloud_provider) return [ '{}.{}.{}'.format(components[0], components[1], x) for x in resource_mapping[components[0]][components[1]] ] return []
def init(config, use, debug, verbose, accounts, tags, policies, resource=None, policy_tags=()): level = verbose and logging.DEBUG or logging.INFO logging.basicConfig( level=level, format="%(asctime)s: %(name)s:%(levelname)s %(message)s") logging.getLogger().setLevel(level) logging.getLogger('botocore').setLevel(logging.ERROR) logging.getLogger('s3transfer').setLevel(logging.WARNING) logging.getLogger('custodian.s3').setLevel(logging.ERROR) logging.getLogger('urllib3').setLevel(logging.WARNING) accounts = comma_expand(accounts) policies = comma_expand(policies) tags = comma_expand(tags) policy_tags = comma_expand(policy_tags) # Filter out custodian log messages on console output if not # at warning level or higher, see LogFilter docs and #2674 for h in logging.getLogger().handlers: if isinstance(h, logging.StreamHandler): h.addFilter(LogFilter()) with open(config, 'rb') as fh: accounts_config = yaml.safe_load(fh.read()) jsonschema.validate(accounts_config, CONFIG_SCHEMA) if use: with open(use) as fh: custodian_config = yaml.safe_load(fh.read()) else: custodian_config = {} accounts_config['accounts'] = list(accounts_iterator(accounts_config)) filter_policies(custodian_config, policy_tags, policies, resource) filter_accounts(accounts_config, tags, accounts) load_available() MainThreadExecutor.c7n_async = False executor = debug and MainThreadExecutor or ProcessPoolExecutor return accounts_config, custodian_config, executor
def diff(repo_uri, source, target, output, verbose): """Policy diff between two arbitrary revisions. Revision specifiers for source and target can use fancy git refspec syntax for symbolics, dates, etc. See: https://git-scm.com/book/en/v2/Git-Tools-Revision-Selection Default revision selection is dependent on current working tree branch. The intent is for two use cases, if on a non-master branch then show the diff to master. If on master show the diff to previous commit on master. For repositories not using the `master` convention, please specify explicit source and target. """ logging.basicConfig( format="%(asctime)s: %(name)s:%(levelname)s %(message)s", level=(verbose and logging.DEBUG or logging.INFO)) logging.getLogger('botocore').setLevel(logging.WARNING) if repo_uri is None: repo_uri = pygit2.discover_repository(os.getcwd()) repo = pygit2.Repository(repo_uri) load_available() # If on master show diff between last commit to current head if repo.head.shorthand == 'master': if source is None: source = 'HEAD^1' if target is None: target = 'master' # Else show difference between master and current head elif target is None: target = repo.head.shorthand if source is None: source = 'master' policy_repo = PolicyRepo(repo_uri, repo) changes = list( policy_repo.delta_commits(repo.revparse_single(source), repo.revparse_single(target))) output.write( yaml.safe_dump({ 'policies': [c.policy.data for c in changes if c.kind != ChangeType.REMOVE] }).encode('utf8'))
def _get_resource_id(self, resource, policy): """ Obtain the id for a given policy from a dict of resources. :param resource: the dict of resources :param policy: the name of the policy :return: the resource_id the policy affects :rtype: string """ resource_type = resource.get(policy, {}).get(self.RESOURCE_TYPE_KEY, self.UNKNOWN_RESOURCE_TYPE) if resource_type == self.UNKNOWN_RESOURCE_TYPE: return load_available() _id = self.UNKNOWN_RESOURCE_ID try: _id = get_resource_class(resource_type) \ .resource_type() \ .id except Exception: logger.warning('unable to get resource_id for %s - id: %s', policy, _id) return _id
def version_cmd(options): from c7n.version import version from c7n.resources import load_available from c7n.mu import generate_requirements if not options.debug: print(version) return indent = 13 print( "\nPlease copy/paste the following info along with any bug reports:\n") print("Custodian: ", version) pyversion = sys.version.replace('\n', '\n' + ' ' * indent) # For readability print("Python: ", pyversion) # os.uname is only available on recent versions of Unix try: print("Platform: ", os.uname()) except Exception: # pragma: no cover print("Platform: ", sys.platform) is_venv = (hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix)) print("Using venv: ", is_venv) in_container = os.path.exists('/.dockerenv') print("Docker: %s" % str(bool(in_container))) print("Installed: \n") packages = ['c7n'] found = load_available(resources=False) if 'gcp' in found: packages.append('c7n_gcp') if 'azure' in found: packages.append('c7n_azure') if 'k8s' in found: packages.append('c7n_kube') if 'openstack' in found: packages.append('c7n_openstack') print(generate_requirements(packages))
def schema_cmd(options): """ Print info about the resources, actions and filters available. """ from c7n import schema if options.json: schema.json_dump(options.resource) return if options.summary: load_available() resource_mapping = schema.resource_vocabulary() schema.pprint_schema_summary(resource_mapping) return # Here are the formats for what we accept: # - No argument # - List all available RESOURCES # - PROVIDER # - List all available RESOURCES for supplied PROVIDER # - RESOURCE # - List all available actions and filters for supplied RESOURCE # - MODE # - List all available MODES # - RESOURCE.actions # - List all available actions for supplied RESOURCE # - RESOURCE.actions.ACTION # - Show class doc string and schema for supplied action # - RESOURCE.filters # - List all available filters for supplied RESOURCE # - RESOURCE.filters.FILTER # - Show class doc string and schema for supplied filter if not options.resource: load_available(resources=False) resource_list = {'resources': sorted(itertools.chain( *[clouds[p].resource_map.keys() for p in PROVIDER_NAMES]))} print(yaml_dump(resource_list)) return # Format is [PROVIDER].RESOURCE.CATEGORY.ITEM # optional provider defaults to aws for compatibility components = options.resource.lower().split('.') if len(components) == 1 and components[0] in PROVIDER_NAMES: load_providers((components[0])) resource_list = {'resources': sorted( clouds[components[0]].resource_map.keys())} print(yaml_dump(resource_list)) return if components[0] in PROVIDER_NAMES: cloud_provider = components.pop(0) components[0] = '%s.%s' % (cloud_provider, components[0]) load_resources((components[0],)) resource_mapping = schema.resource_vocabulary( cloud_provider) elif components[0] == 'mode': load_available(resources=False) resource_mapping = schema.resource_vocabulary() else: # compatibility, aws is default for provider components[0] = 'aws.%s' % components[0] load_resources((components[0],)) resource_mapping = schema.resource_vocabulary('aws') # # Handle mode # if components[0] == "mode": if len(components) == 1: output = {components[0]: list(resource_mapping[components[0]].keys())} print(yaml_dump(output)) return if len(components) == 2: if components[1] not in resource_mapping[components[0]]: log.error('{} is not a valid mode'.format(components[1])) sys.exit(1) _print_cls_schema(resource_mapping[components[0]][components[1]]) return # We received too much (e.g. mode.actions.foo) log.error("Invalid selector '{}'. Valid options are 'mode' " "or 'mode.TYPE'".format(options.resource)) sys.exit(1) # # Handle resource # resource = components[0] if resource not in resource_mapping: log.error('{} is not a valid resource'.format(resource)) sys.exit(1) if len(components) == 1: docstring = ElementSchema.doc( resource_mapping[resource]['classes']['resource']) del(resource_mapping[resource]['classes']) if docstring: print("\nHelp\n----\n") print(docstring + '\n') output = {resource: resource_mapping[resource]} print(yaml_dump(output)) return # # Handle category # category = components[1] if category not in ('actions', 'filters'): log.error("Valid choices are 'actions' and 'filters'. You supplied '{}'".format(category)) sys.exit(1) if len(components) == 2: output = "No {} available for resource {}.".format(category, resource) if category in resource_mapping[resource]: output = {resource: { category: resource_mapping[resource][category]}} print(yaml_dump(output)) return # # Handle item # item = components[2] if item not in resource_mapping[resource][category]: log.error('{} is not in the {} list for resource {}'.format(item, category, resource)) sys.exit(1) if len(components) == 3: cls = resource_mapping[resource]['classes'][category][item] _print_cls_schema(cls) return # We received too much (e.g. s3.actions.foo.bar) log.error("Invalid selector '{}'. Max of 3 components in the " "format RESOURCE.CATEGORY.ITEM".format(options.resource)) sys.exit(1)
def run_account(account, region, policies_config, output_path, cache_period, cache_path, metrics, dryrun, debug): """Execute a set of policies on an account. """ logging.getLogger('custodian.output').setLevel(logging.ERROR + 1) CONN_CACHE.session = None CONN_CACHE.time = None load_available() # allow users to specify interpolated output paths if '{' not in output_path: output_path = os.path.join(output_path, account['name'], region) cache_path = os.path.join(cache_path, "%s-%s.cache" % (account['account_id'], region)) config = Config.empty( region=region, cache=cache_path, cache_period=cache_period, dryrun=dryrun, output_dir=output_path, account_id=account['account_id'], metrics_enabled=metrics, log_group=None, profile=None, external_id=None) env_vars = account_tags(account) if account.get('role'): if isinstance(account['role'], six.string_types): config['assume_role'] = account['role'] config['external_id'] = account.get('external_id') else: env_vars.update( _get_env_creds(get_session(account, 'custodian', region), region)) elif account.get('profile'): config['profile'] = account['profile'] policies = PolicyCollection.from_data(policies_config, config) policy_counts = {} success = True st = time.time() with environ(**env_vars): for p in policies: # Variable expansion and non schema validation (not optional) p.expand_variables(p.get_variables(account.get('vars', {}))) p.validate() if p.region and p.region != region: continue log.debug( "Running policy:%s account:%s region:%s", p.name, account['name'], region) try: resources = p.run() policy_counts[p.name] = resources and len(resources) or 0 if not resources: continue if not config.dryrun and p.execution_mode != 'pull': log.info("Ran account:%s region:%s policy:%s provisioned time:%0.2f", account['name'], region, p.name, time.time() - st) continue log.info( "Ran account:%s region:%s policy:%s matched:%d time:%0.2f", account['name'], region, p.name, len(resources), time.time() - st) except ClientError as e: success = False if e.response['Error']['Code'] == 'AccessDenied': log.warning('Access denied api:%s policy:%s account:%s region:%s', e.operation_name, p.name, account['name'], region) return policy_counts, success log.error( "Exception running policy:%s account:%s region:%s error:%s", p.name, account['name'], region, e) continue except Exception as e: success = False log.error( "Exception running policy:%s account:%s region:%s error:%s", p.name, account['name'], region, e) if not debug: continue import traceback, pdb, sys traceback.print_exc() pdb.post_mortem(sys.exc_info()[-1]) raise return policy_counts, success
def setUp(self): # we need to load all resources for the linting meta tests. load_available()
def json_dump(resource=None): load_available() print(json.dumps(generate(resource), indent=2))
def main(path, output, since, end, user): repo = pygit2.Repository(path) if since: since_dateref = resolve_dateref(since, repo) if end: end_dateref = resolve_dateref(end, repo) groups = {} count = 0 for commit in repo.walk(repo.head.target): cdate = commit_date(commit) if since and cdate <= since_dateref: break if end and cdate >= end_dateref: continue if user and commit.author.name not in user: continue parts = commit.message.strip().split('-', 1) if not len(parts) > 1: print("bad commit %s %s" % (cdate, commit.message)) category = 'other' else: category = parts[0] category = category.strip().lower() if '.' in category: category = category.split('.', 1)[0] if '/' in category: category = category.split('/', 1)[0] if category in aliases: category = aliases[category] message = commit.message.strip() if '\n' in message: message = message.split('\n')[0] found = False for s in skip: if category.startswith(s): found = True continue if found: continue if user: message = "%s - %s - %s" % (cdate.strftime("%Y/%m/%d"), commit.author.name, message) groups.setdefault(category, []).append(message) count += 1 import pprint print('total commits %d' % count) pprint.pprint(dict([(k, len(groups[k])) for k in groups])) diff_md = "" if since and not end and since.count('.') > 2: schema_old = schema_outline_from_docker(since) load_available() schema_new = resource_outline() diff_md = schema_diff(schema_old, schema_new) with open(output, 'w') as fh: for k in sorted(groups): if k in skip: continue print("# %s" % k, file=fh) for c in sorted(groups[k]): print(" - %s" % c.strip(), file=fh) print("\n", file=fh) if diff_md.strip(): print("# schema changes", file=fh) print(diff_md, file=fh)