Exemple #1
0
    def __init__(self, storage_id, queue_name, policy_uri,
                 log_group=None, metrics=None, output_dir=None):
        logging.basicConfig(level=logging.INFO, format='%(message)s')
        log.info("Running Azure Cloud Custodian Self-Host")

        resources.load_available()

        self.session = local_session(Session)
        self.storage_session = self.session
        storage_subscription_id = ResourceIdParser.get_subscription_id(storage_id)
        if storage_subscription_id != self.session.subscription_id:
            self.storage_session = Session(subscription_id=storage_subscription_id)

        # Load configuration
        self.options = Host.build_options(output_dir, log_group, metrics)
        self.policy_storage_uri = policy_uri
        self.event_queue_id = storage_id
        self.event_queue_name = queue_name

        # Default event queue name is the subscription ID
        if not self.event_queue_name:
            self.event_queue_name = self.session.subscription_id

        # Prepare storage bits
        self.policy_blob_client = None
        self.blob_cache = {}
        self.queue_storage_account = self.prepare_queue_storage(
            self.event_queue_id,
            self.event_queue_name)

        self.queue_service = None

        # Register event subscription
        self.update_event_subscription()

        # Policy cache and dictionary
        self.policy_cache = tempfile.mkdtemp()
        self.policies = {}

        # Configure scheduler
        self.scheduler = BlockingScheduler(Host.get_scheduler_config())
        logging.getLogger('apscheduler.executors.default').setLevel(logging.ERROR)
        logging.getLogger('apscheduler').setLevel(logging.ERROR)

        # Schedule recurring policy updates
        self.scheduler.add_job(self.update_policies,
                               'interval',
                               seconds=policy_update_seconds,
                               id="update_policies",
                               next_run_time=datetime.now(),
                               executor='threadpool')

        # Schedule recurring queue polling
        self.scheduler.add_job(self.poll_queue,
                               'interval',
                               seconds=queue_poll_seconds,
                               id="poll_queue",
                               executor='threadpool')

        self.scheduler.start()
def stream(repo_uri,
           stream_uri,
           verbose,
           assume,
           sort,
           before=None,
           after=None,
           policy_pattern=()):
    """Stream git history policy changes to destination.


    Default stream destination is a summary of the policy changes to stdout, one
    per line. Also supported for stdout streaming is `jsonline`.

    AWS Kinesis and SQS destinations are specified by providing the ARN.

    Database destinations are supported by providing a sqlalchemy DSN. Note
    SQLAlchemy and db drivers must be installed separately as they an optional
    dependency.

    When using database destinations, streaming defaults to incremental.
    """
    logging.basicConfig(
        format="%(asctime)s: %(name)s:%(levelname)s %(message)s",
        level=(verbose and logging.DEBUG or logging.INFO))
    logging.getLogger('botocore').setLevel(logging.WARNING)

    if before:
        before = parse(before)
    if after:
        after = parse(after)
    if sort:
        sort = reduce(operator.or_, [SORT_TYPE[s] for s in sort])
    matcher = None
    if policy_pattern:
        matcher = partial(policy_path_matcher, patterns=policy_pattern)

    with contextlib.closing(TempDir().open()) as temp_dir:
        if repo_uri is None:
            repo_uri = pygit2.discover_repository(os.getcwd())
            log.debug("Using repository %s", repo_uri)
        if repo_uri.startswith('http') or repo_uri.startswith('git@'):
            log.info("Cloning repository: %s", repo_uri)
            repo = pygit2.clone_repository(repo_uri, temp_dir.path)
        else:
            repo = pygit2.Repository(repo_uri)
        load_available()
        policy_repo = PolicyRepo(repo_uri, repo, matcher)
        change_count = 0

        with contextlib.closing(transport(stream_uri, assume)) as t:
            if after is None and isinstance(t, IndexedTransport):
                after = t.last()
            for change in policy_repo.delta_stream(after=after, before=before):
                change_count += 1
                t.send(change)

        log.info("Streamed %d policy repo changes", change_count)
    return change_count
    def test_iam_permissions_validity(self):
        cfg = Config.empty()
        missing = set()
        invalid = []

        perms = load_data('iam-actions.json')
        resources.load_available()

        for k, v in manager.resources.items():
            p = Bag({
                'name': 'permcheck',
                'resource': k,
                'provider_name': 'aws'
            })
            ctx = self.get_context(config=cfg, policy=p)
            mgr = v(ctx, p)
            # if getattr(mgr, 'permissions', None):
            #    print(mgr)

            found = False
            for s in (mgr.resource_type.service,
                      getattr(mgr.resource_type, 'permission_prefix', None)):
                if s in perms:
                    found = True
            if not found:
                missing.add("%s->%s" % (k, mgr.resource_type.service))
                continue
            invalid.extend(
                self.check_permissions(perms, mgr.get_permissions(), k))

            for n, a in v.action_registry.items():
                p['actions'] = [n]
                invalid.extend(
                    self.check_permissions(perms,
                                           a({}, mgr).get_permissions(),
                                           "{k}.actions.{n}".format(k=k, n=n)))

            for n, f in v.filter_registry.items():
                if n in ('or', 'and', 'not', 'missing'):
                    continue
                p['filters'] = [n]
                invalid.extend(
                    self.check_permissions(perms,
                                           f({}, mgr).get_permissions(),
                                           "{k}.filters.{n}".format(k=k, n=n)))

        if missing:
            raise ValueError("resources missing service %s" %
                             ('\n'.join(sorted(missing))))

        if invalid:
            raise ValueError("invalid permissions \n %s" %
                             ('\n'.join(sorted(map(str, invalid)))))
Exemple #4
0
def report_account(account, region, policies_config, output_path, cache_path,
                   debug):
    output_path = os.path.join(output_path, account['name'], region)
    cache_path = os.path.join(cache_path,
                              "%s-%s.cache" % (account['name'], region))

    load_available()
    config = Config.empty(region=region,
                          output_dir=output_path,
                          account_id=account['account_id'],
                          metrics_enabled=False,
                          cache=cache_path,
                          log_group=None,
                          profile=None,
                          external_id=None)

    if account.get('role'):
        config['assume_role'] = account['role']
        config['external_id'] = account.get('external_id')
    elif account.get('profile'):
        config['profile'] = account['profile']

    policies = PolicyCollection.from_data(policies_config, config)
    records = []
    for p in policies:
        # initializee policy execution context for output access
        p.ctx.initialize()
        log.debug("Report policy:%s account:%s region:%s path:%s", p.name,
                  account['name'], region, output_path)

        if p.ctx.output.type == "s3":
            delta = timedelta(days=1)
            begin_date = datetime.now() - delta

            policy_records = record_set(
                p.session_factory, p.ctx.output.config['netloc'],
                strip_output_path(p.ctx.output.config['path'], p.name),
                begin_date)
        else:
            policy_records = fs_record_set(p.ctx.log_dir, p.name)

        for r in policy_records:
            r['policy'] = p.name
            r['region'] = p.options.region
            r['account'] = account['name']
            for t in account.get('tags', ()):
                if ':' in t:
                    k, v = t.split(':', 1)
                    r[k] = v
        records.extend(policy_records)
    return records
def schema_completer(prefix):
    """ For tab-completion via argcomplete, return completion options.

    For the given prefix so far, return the possible options.  Note that
    filtering via startswith happens after this list is returned.
    """
    from c7n import schema
    load_available()
    components = prefix.split('.')

    if components[0] in provider.clouds.keys():
        cloud_provider = components.pop(0)
        provider_resources = provider.resources(cloud_provider)
    else:
        cloud_provider = 'aws'
        provider_resources = provider.resources('aws')
        components[0] = "aws.%s" % components[0]

    # Completions for resource
    if len(components) == 1:
        choices = [
            r for r in provider.resources().keys()
            if r.startswith(components[0])
        ]
        if len(choices) == 1:
            choices += ['{}{}'.format(choices[0], '.')]
        return choices

    if components[0] not in provider_resources.keys():
        return []

    # Completions for category
    if len(components) == 2:
        choices = [
            '{}.{}'.format(components[0], x) for x in ('actions', 'filters')
            if x.startswith(components[1])
        ]
        if len(choices) == 1:
            choices += ['{}{}'.format(choices[0], '.')]
        return choices

    # Completions for item
    elif len(components) == 3:
        resource_mapping = schema.resource_vocabulary(cloud_provider)
        return [
            '{}.{}.{}'.format(components[0], components[1], x)
            for x in resource_mapping[components[0]][components[1]]
        ]

    return []
Exemple #6
0
def init(config,
         use,
         debug,
         verbose,
         accounts,
         tags,
         policies,
         resource=None,
         policy_tags=()):
    level = verbose and logging.DEBUG or logging.INFO
    logging.basicConfig(
        level=level, format="%(asctime)s: %(name)s:%(levelname)s %(message)s")

    logging.getLogger().setLevel(level)
    logging.getLogger('botocore').setLevel(logging.ERROR)
    logging.getLogger('s3transfer').setLevel(logging.WARNING)
    logging.getLogger('custodian.s3').setLevel(logging.ERROR)
    logging.getLogger('urllib3').setLevel(logging.WARNING)

    accounts = comma_expand(accounts)
    policies = comma_expand(policies)
    tags = comma_expand(tags)
    policy_tags = comma_expand(policy_tags)

    # Filter out custodian log messages on console output if not
    # at warning level or higher, see LogFilter docs and #2674
    for h in logging.getLogger().handlers:
        if isinstance(h, logging.StreamHandler):
            h.addFilter(LogFilter())

    with open(config, 'rb') as fh:
        accounts_config = yaml.safe_load(fh.read())
        jsonschema.validate(accounts_config, CONFIG_SCHEMA)

    if use:
        with open(use) as fh:
            custodian_config = yaml.safe_load(fh.read())
    else:
        custodian_config = {}

    accounts_config['accounts'] = list(accounts_iterator(accounts_config))
    filter_policies(custodian_config, policy_tags, policies, resource)
    filter_accounts(accounts_config, tags, accounts)

    load_available()
    MainThreadExecutor.c7n_async = False
    executor = debug and MainThreadExecutor or ProcessPoolExecutor
    return accounts_config, custodian_config, executor
def diff(repo_uri, source, target, output, verbose):
    """Policy diff between two arbitrary revisions.

    Revision specifiers for source and target can use fancy git refspec syntax
    for symbolics, dates, etc.

    See: https://git-scm.com/book/en/v2/Git-Tools-Revision-Selection

    Default revision selection is dependent on current working tree
    branch. The intent is for two use cases, if on a non-master branch
    then show the diff to master.  If on master show the diff to
    previous commit on master. For repositories not using the
    `master` convention, please specify explicit source and target.
    """
    logging.basicConfig(
        format="%(asctime)s: %(name)s:%(levelname)s %(message)s",
        level=(verbose and logging.DEBUG or logging.INFO))
    logging.getLogger('botocore').setLevel(logging.WARNING)

    if repo_uri is None:
        repo_uri = pygit2.discover_repository(os.getcwd())

    repo = pygit2.Repository(repo_uri)
    load_available()

    # If on master show diff between last commit to current head
    if repo.head.shorthand == 'master':
        if source is None:
            source = 'HEAD^1'
        if target is None:
            target = 'master'
    # Else show difference between master and current head
    elif target is None:
        target = repo.head.shorthand
    if source is None:
        source = 'master'

    policy_repo = PolicyRepo(repo_uri, repo)
    changes = list(
        policy_repo.delta_commits(repo.revparse_single(source),
                                  repo.revparse_single(target)))
    output.write(
        yaml.safe_dump({
            'policies':
            [c.policy.data for c in changes if c.kind != ChangeType.REMOVE]
        }).encode('utf8'))
 def _get_resource_id(self, resource, policy):
     """
     Obtain the id for a given policy from a dict of resources.
     :param resource: the dict of resources
     :param policy: the name of the policy
     :return: the resource_id the policy affects
     :rtype: string
     """
     resource_type = resource.get(policy,
                                  {}).get(self.RESOURCE_TYPE_KEY,
                                          self.UNKNOWN_RESOURCE_TYPE)
     if resource_type == self.UNKNOWN_RESOURCE_TYPE:
         return
     load_available()
     _id = self.UNKNOWN_RESOURCE_ID
     try:
         _id = get_resource_class(resource_type) \
             .resource_type() \
             .id
     except Exception:
         logger.warning('unable to get resource_id for %s - id: %s', policy,
                        _id)
     return _id
Exemple #9
0
def version_cmd(options):
    from c7n.version import version
    from c7n.resources import load_available
    from c7n.mu import generate_requirements

    if not options.debug:
        print(version)
        return

    indent = 13

    print(
        "\nPlease copy/paste the following info along with any bug reports:\n")
    print("Custodian:  ", version)
    pyversion = sys.version.replace('\n',
                                    '\n' + ' ' * indent)  # For readability
    print("Python:     ", pyversion)
    # os.uname is only available on recent versions of Unix
    try:
        print("Platform:   ", os.uname())
    except Exception:  # pragma: no cover
        print("Platform:  ", sys.platform)

    is_venv = (hasattr(sys, 'real_prefix') or
               (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix))
    print("Using venv: ", is_venv)
    in_container = os.path.exists('/.dockerenv')
    print("Docker: %s" % str(bool(in_container)))
    print("Installed: \n")

    packages = ['c7n']
    found = load_available(resources=False)
    if 'gcp' in found:
        packages.append('c7n_gcp')
    if 'azure' in found:
        packages.append('c7n_azure')
    if 'k8s' in found:
        packages.append('c7n_kube')
    if 'openstack' in found:
        packages.append('c7n_openstack')
    print(generate_requirements(packages))
Exemple #10
0
def schema_cmd(options):
    """ Print info about the resources, actions and filters available. """
    from c7n import schema
    if options.json:
        schema.json_dump(options.resource)
        return

    if options.summary:
        load_available()
        resource_mapping = schema.resource_vocabulary()
        schema.pprint_schema_summary(resource_mapping)
        return

    # Here are the formats for what we accept:
    # - No argument
    #   - List all available RESOURCES
    # - PROVIDER
    #   - List all available RESOURCES for supplied PROVIDER
    # - RESOURCE
    #   - List all available actions and filters for supplied RESOURCE
    # - MODE
    #   - List all available MODES
    # - RESOURCE.actions
    #   - List all available actions for supplied RESOURCE
    # - RESOURCE.actions.ACTION
    #   - Show class doc string and schema for supplied action
    # - RESOURCE.filters
    #   - List all available filters for supplied RESOURCE
    # - RESOURCE.filters.FILTER
    #   - Show class doc string and schema for supplied filter

    if not options.resource:
        load_available(resources=False)
        resource_list = {'resources': sorted(itertools.chain(
            *[clouds[p].resource_map.keys() for p in PROVIDER_NAMES]))}
        print(yaml_dump(resource_list))
        return

    # Format is [PROVIDER].RESOURCE.CATEGORY.ITEM
    # optional provider defaults to aws for compatibility
    components = options.resource.lower().split('.')

    if len(components) == 1 and components[0] in PROVIDER_NAMES:
        load_providers((components[0]))
        resource_list = {'resources': sorted(
            clouds[components[0]].resource_map.keys())}
        print(yaml_dump(resource_list))
        return
    if components[0] in PROVIDER_NAMES:
        cloud_provider = components.pop(0)
        components[0] = '%s.%s' % (cloud_provider, components[0])
        load_resources((components[0],))
        resource_mapping = schema.resource_vocabulary(
            cloud_provider)
    elif components[0] == 'mode':
        load_available(resources=False)
        resource_mapping = schema.resource_vocabulary()
    else:  # compatibility, aws is default for provider
        components[0] = 'aws.%s' % components[0]
        load_resources((components[0],))
        resource_mapping = schema.resource_vocabulary('aws')

    #
    # Handle mode
    #
    if components[0] == "mode":
        if len(components) == 1:
            output = {components[0]: list(resource_mapping[components[0]].keys())}
            print(yaml_dump(output))
            return

        if len(components) == 2:
            if components[1] not in resource_mapping[components[0]]:
                log.error('{} is not a valid mode'.format(components[1]))
                sys.exit(1)

            _print_cls_schema(resource_mapping[components[0]][components[1]])
            return

        # We received too much (e.g. mode.actions.foo)
        log.error("Invalid selector '{}'. Valid options are 'mode' "
                  "or 'mode.TYPE'".format(options.resource))
        sys.exit(1)
    #
    # Handle resource
    #
    resource = components[0]
    if resource not in resource_mapping:
        log.error('{} is not a valid resource'.format(resource))
        sys.exit(1)

    if len(components) == 1:
        docstring = ElementSchema.doc(
            resource_mapping[resource]['classes']['resource'])
        del(resource_mapping[resource]['classes'])
        if docstring:
            print("\nHelp\n----\n")
            print(docstring + '\n')
        output = {resource: resource_mapping[resource]}
        print(yaml_dump(output))
        return

    #
    # Handle category
    #
    category = components[1]
    if category not in ('actions', 'filters'):
        log.error("Valid choices are 'actions' and 'filters'. You supplied '{}'".format(category))
        sys.exit(1)

    if len(components) == 2:
        output = "No {} available for resource {}.".format(category, resource)
        if category in resource_mapping[resource]:
            output = {resource: {
                category: resource_mapping[resource][category]}}
        print(yaml_dump(output))
        return

    #
    # Handle item
    #
    item = components[2]
    if item not in resource_mapping[resource][category]:
        log.error('{} is not in the {} list for resource {}'.format(item, category, resource))
        sys.exit(1)

    if len(components) == 3:
        cls = resource_mapping[resource]['classes'][category][item]
        _print_cls_schema(cls)

        return

    # We received too much (e.g. s3.actions.foo.bar)
    log.error("Invalid selector '{}'.  Max of 3 components in the "
              "format RESOURCE.CATEGORY.ITEM".format(options.resource))
    sys.exit(1)
Exemple #11
0
def run_account(account, region, policies_config, output_path,
                cache_period, cache_path, metrics, dryrun, debug):
    """Execute a set of policies on an account.
    """
    logging.getLogger('custodian.output').setLevel(logging.ERROR + 1)
    CONN_CACHE.session = None
    CONN_CACHE.time = None
    load_available()

    # allow users to specify interpolated output paths
    if '{' not in output_path:
        output_path = os.path.join(output_path, account['name'], region)

    cache_path = os.path.join(cache_path, "%s-%s.cache" % (account['account_id'], region))

    config = Config.empty(
        region=region, cache=cache_path,
        cache_period=cache_period, dryrun=dryrun, output_dir=output_path,
        account_id=account['account_id'], metrics_enabled=metrics,
        log_group=None, profile=None, external_id=None)

    env_vars = account_tags(account)

    if account.get('role'):
        if isinstance(account['role'], six.string_types):
            config['assume_role'] = account['role']
            config['external_id'] = account.get('external_id')
        else:
            env_vars.update(
                _get_env_creds(get_session(account, 'custodian', region), region))

    elif account.get('profile'):
        config['profile'] = account['profile']

    policies = PolicyCollection.from_data(policies_config, config)
    policy_counts = {}
    success = True
    st = time.time()

    with environ(**env_vars):
        for p in policies:
            # Variable expansion and non schema validation (not optional)
            p.expand_variables(p.get_variables(account.get('vars', {})))
            p.validate()

            if p.region and p.region != region:
                continue

            log.debug(
                "Running policy:%s account:%s region:%s",
                p.name, account['name'], region)
            try:
                resources = p.run()
                policy_counts[p.name] = resources and len(resources) or 0
                if not resources:
                    continue
                if not config.dryrun and p.execution_mode != 'pull':
                    log.info("Ran account:%s region:%s policy:%s provisioned time:%0.2f",
                             account['name'], region, p.name, time.time() - st)
                    continue
                log.info(
                    "Ran account:%s region:%s policy:%s matched:%d time:%0.2f",
                    account['name'], region, p.name, len(resources),
                    time.time() - st)
            except ClientError as e:
                success = False
                if e.response['Error']['Code'] == 'AccessDenied':
                    log.warning('Access denied api:%s policy:%s account:%s region:%s',
                                e.operation_name, p.name, account['name'], region)
                    return policy_counts, success
                log.error(
                    "Exception running policy:%s account:%s region:%s error:%s",
                    p.name, account['name'], region, e)
                continue
            except Exception as e:
                success = False
                log.error(
                    "Exception running policy:%s account:%s region:%s error:%s",
                    p.name, account['name'], region, e)
                if not debug:
                    continue
                import traceback, pdb, sys
                traceback.print_exc()
                pdb.post_mortem(sys.exc_info()[-1])
                raise

    return policy_counts, success
Exemple #12
0
 def setUp(self):
     # we need to load all resources for the linting meta tests.
     load_available()
Exemple #13
0
def json_dump(resource=None):
    load_available()
    print(json.dumps(generate(resource), indent=2))
Exemple #14
0
def main(path, output, since, end, user):
    repo = pygit2.Repository(path)
    if since:
        since_dateref = resolve_dateref(since, repo)
    if end:
        end_dateref = resolve_dateref(end, repo)

    groups = {}
    count = 0
    for commit in repo.walk(repo.head.target):
        cdate = commit_date(commit)
        if since and cdate <= since_dateref:
            break
        if end and cdate >= end_dateref:
            continue
        if user and commit.author.name not in user:
            continue

        parts = commit.message.strip().split('-', 1)
        if not len(parts) > 1:
            print("bad commit %s %s" % (cdate, commit.message))
            category = 'other'
        else:
            category = parts[0]
        category = category.strip().lower()
        if '.' in category:
            category = category.split('.', 1)[0]
        if '/' in category:
            category = category.split('/', 1)[0]
        if category in aliases:
            category = aliases[category]

        message = commit.message.strip()
        if '\n' in message:
            message = message.split('\n')[0]

        found = False
        for s in skip:
            if category.startswith(s):
                found = True
                continue
        if found:
            continue
        if user:
            message = "%s - %s - %s" % (cdate.strftime("%Y/%m/%d"),
                                        commit.author.name, message)
        groups.setdefault(category, []).append(message)
        count += 1

    import pprint
    print('total commits %d' % count)
    pprint.pprint(dict([(k, len(groups[k])) for k in groups]))

    diff_md = ""
    if since and not end and since.count('.') > 2:
        schema_old = schema_outline_from_docker(since)
        load_available()
        schema_new = resource_outline()
        diff_md = schema_diff(schema_old, schema_new)

    with open(output, 'w') as fh:
        for k in sorted(groups):
            if k in skip:
                continue
            print("# %s" % k, file=fh)
            for c in sorted(groups[k]):
                print(" - %s" % c.strip(), file=fh)
            print("\n", file=fh)
        if diff_md.strip():
            print("# schema changes", file=fh)
            print(diff_md, file=fh)