def _require_account_id(args: Dict) -> str: path = _get_with_parent('path', args) if path is None: raise GFInternal(f'Missing path in {args}') path_parts = path.split('$') if len(path_parts) < 2: raise GFInternal(f'Invalid path {path}') return path_parts[1]
def _config_uri_fn(resource_name: str, **kwargs) -> str: if resource_name == 'ConfigurationRecorder': name = kwargs['name'] region = _get_with_parent('region', kwargs) if region is None: raise GFInternal(f'Missing region in {kwargs}') account_id = _require_account_id(kwargs) return f'configurationRecorders/{account_id}/{region}/{name}' raise GFInternal(f'Failed logs uri fn {resource_name} {kwargs}')
def _redshift_uri_fn(partition: str, account_id: str, resource_name: str, **kwargs) -> str: id = kwargs.get('id') if id is None: raise GFInternal(f'Missing id in {kwargs}') region = _get_with_parent('region', kwargs) if region is None: raise GFInternal(f'Missing region in {kwargs}') if resource_name == 'cluster': return f'arn:{partition}:redshift:{region}:{account_id}:cluster:{id}' raise GFInternal(f'Failed redshift uri fn {resource_name} {kwargs}')
def arn_fn(service: str, partition: str, account_id: str, **kwargs) -> Uri: if 'uri' in kwargs: return kwargs['uri'] resource_name = kwargs.pop('resource_name') # allow to override account_id in case of targeting across accounts account_id = kwargs.get('account_id', account_id) if service == 'ec2': return _ec2_arn_fn(resource_name, account_id, partition, **kwargs) elif service == 's3': return _s3_bucket_arn_fn(kwargs['name'], partition) elif service == 'elb': return _elb_arn_fn(resource_name, partition, account_id, **kwargs) elif service == 'iam': return _iam_uri_fn(resource_name, partition, account_id, **kwargs) elif service == 'logs': return _logs_uri_fn(partition, account_id, resource_name, **kwargs) elif service == 'config': return _config_uri_fn(resource_name, **kwargs) elif service == 'apigateway': return _apigateway_uri_fn(partition, account_id, resource_name, **kwargs) elif service == 'apigatewayv2': return _apigatewayv2_uri_fn(partition, account_id, resource_name, **kwargs) elif service == 'redshift': return _redshift_uri_fn(partition, account_id, resource_name, **kwargs) elif service == 'cloudwatch' and resource_name == 'metric': return _cloudwatch_metrics(**kwargs) elif service == 'elasticbeanstalk' and resource_name == 'applicationversion': return _elastic_beanstalk(partition, account_id, resource_name, **kwargs) # elif service == 'cloudformation': # return _cloudformation_uri_fn(partition, account_id, resource_name, # **kwargs) id = kwargs.get('id') if id is None: raise GFInternal(f'Missing id in {kwargs}') region = _get_with_parent('region', kwargs) if region is None: if service in ('route53', ): region = '' else: raise GFInternal( f'Missing region in {kwargs} for service {service}') if service == 'autoscaling' and resource_name == 'launchConfiguration': return ( f'arn:{partition}:{service}:{region}:{account_id}:launchConfiguration:', f'launchConfigurationName/{id}') if service in ('autoscaling', 'kms', 'route53', 'ssm', 'eks', 'elasticbeanstalk'): return f'arn:{partition}:{service}:{region}:{account_id}:{resource_name}/{id}' # TODO: remove .lower() call. make resource_name required, then verify it? return f'arn:{partition}:{service}:{region}:{account_id}:{resource_name.lower()}:{id}'
def _import_task(proxy: ServiceProxy, cluster_arn: str, task_arn: str): tasks_resp = proxy.list('describe_tasks', cluster=cluster_arn, tasks=[task_arn], include=['TAGS']) if tasks_resp is None: raise GFInternal(f'Failed to fetch ecs task {task_arn}') task_list = tasks_resp[1].get('tasks', []) if len(task_list) != 1: raise GFInternal(f'Wrong number of tasks for {task_arn} {tasks_resp}') task = task_list[0] return task
def _import_service(proxy: ServiceProxy, cluster_arn: str, service_arn: str): services_resp = proxy.list('describe_services', cluster=cluster_arn, services=[service_arn], include=['TAGS']) if services_resp is None: raise GFInternal(f'Failed to fetch ecs service {service_arn}') service_list = services_resp[1].get('services', []) if len(service_list) != 1: raise GFInternal( f'Wrong number of services for {service_arn} {services_resp}') service = service_list[0] return service
def _import_cluster(proxy: ServiceProxy, cluster_arn: str) -> Dict[str, Any]: clusters_resp = proxy.list( 'describe_clusters', clusters=[cluster_arn], include=["ATTACHMENTS", "SETTINGS", "STATISTICS", "TAGS"]) if clusters_resp is None: raise GFInternal(f'Failed to fetch ecs cluster {cluster_arn}') cluster_list = clusters_resp[1].get('clusters', []) if len(cluster_list) != 1: raise GFInternal( f'Wrong number of clusters for {cluster_arn} {clusters_resp}') cluster = cluster_list[0] return cluster
def _map_resources_v1( self, raw_list: List[Any], ctx: Context, service: str, resource_name: str, raw_uri_fn: Callable, parent_args: Optional[Dict] = None) -> Iterator[MapResult]: # TODO: validate? transform = self._find_transform(service, resource_name) if transform.version != 1: raise GFInternal( f'v1 mapper called for {resource_name} with version {transform.version}' ) parent_kwargs = {} if parent_args is None else parent_args for spec in transform.resources: yield from self._map_spec_v1(transform.service, spec, raw_list, ctx, resource_name, raw_uri_fn, parent_kwargs) for subspec in transform.subresources: for parent in raw_list: subresources = parent.get(subspec.key, []) parent_params = { k: self.value_from_spec(v, parent, parent=parent_args) for k, v in subspec.parent.items() } yield from self._map_resources_v1( subresources, ctx, service, subspec.typ, partial(raw_uri_fn, resource_name=subspec.typ), parent_args=parent_params)
def _apigateway_uri_fn(partition: str, account_id: str, resource_name: str, **kwargs) -> str: id = kwargs.get('id') if id is None: raise GFInternal(f'Missing id in {kwargs}') region = _get_with_parent('region', kwargs) if region is None: raise GFInternal(f'Missing region in {kwargs}') if resource_name == 'RestApi': return f'arn:{partition}:execute-api:{region}:{account_id}:{id}' elif resource_name == 'Stage': api_id = _get_with_parent('apiId', kwargs) if api_id is None: raise GFInternal(f'Missing ApiId in {kwargs}') return f'arn:{partition}:execute-api:{region}:{account_id}:{api_id}/{id}' raise GFInternal(f'Failed apigateway uri fn {resource_name} {kwargs}')
def _elastic_beanstalk(partition: str, account_id: str, resource_name: str, **kwargs) -> str: if resource_name == 'applicationversion': version_label = kwargs['id'] application_name = kwargs['application_name'] region = _get_with_parent('region', kwargs) return f'arn:{partition}:elasticbeanstalk:{region}:{account_id}:applicationversion/{application_name}/{version_label}' raise GFInternal( f'Failed elasticbeanstalk uri fn {resource_name} {kwargs}')
def _import_cluster(proxy: ServiceProxy, cluster_name: str) -> Dict[str, Any]: cluster_resp = proxy.get('describe_cluster', name=cluster_name) if cluster_resp is None: raise GFInternal(f'Failed to fetch eks cluster {cluster_name}') cluster = cluster_resp.get('cluster') # Tags included already # tags_resp = proxy.list('list_tags_for_resource', resourceArn=cluster['arn']) # if tags_resp is not None: # cluster['Tags'] = tags_resp[1]['tags'] return cluster
def _logs_uri_fn(partition: str, account_id: str, resource_name: str, **kwargs) -> str: if resource_name == 'metric-filter': arn = _get_with_parent('arn', kwargs) name = kwargs['filter_name'] return f'{arn}:{resource_name}:{name}' elif resource_name == 'log-group': region = _get_with_parent('region', kwargs) id = kwargs['log_group_id'] return f'arn:{partition}:logs:{region}:{account_id}:{resource_name.lower()}:{id}:*' raise GFInternal(f'Failed logs uri fn {resource_name} {kwargs}')
def _elb_arn_fn(resource_name: str, partition: str, account_id: str, **kwargs) -> str: region = kwargs['context']['region'] if resource_name == 'loadbalancer': name = kwargs['name'] return f'arn:{partition}:elasticloadbalancing:' \ f'{region}:{account_id}:loadbalancer/{name}' elif resource_name == 'Listener': return _listener_arn_fn(kwargs['loadbalancer_name'], kwargs['listener_id'], account_id, region, partition) raise GFInternal(f'Failed ELB ARN {resource_name} {kwargs}')
def _cloudwatch_metrics(**kwargs) -> str: region = _get_with_parent('region', kwargs) if region is None: raise GFInternal(f'Missing region in {kwargs} for metric') account_id = _require_account_id(kwargs) name = kwargs['metric_name'] namespace = kwargs['metric_namespace'] dimensions = kwargs['metric_dimensions'] or [] flattened = '$'.join([ f'{dim["Name"]}_{dim["Value"]}' for dim in dimensions if dim['Value'] is not None ]) return f'metrics/{account_id}/{region}/{namespace}/{name}/{flattened}'
def _ec2_arn_fn(resource_name, account: str, partition: str, **kwargs) -> Uri: # handle security group by name if resource_name == 'security-group' and 'id' not in kwargs: return _security_group_by_name(partition, **kwargs) id = kwargs['id'] region = _get_with_parent('region', kwargs) if region is None: zone = _get_with_parent('zone', kwargs) if zone is not None: region = _zone_to_region(zone) if region is None: raise GFInternal(f'Missing region in {kwargs}') return f'arn:{partition}:ec2:{region}:{account}:{resource_name.lower()}/{id}'
def _iam_uri_fn(resource_name: str, partition: str, account_id: str, **kwargs) -> str: if resource_name == 'policy-version': return f'{kwargs["policy_arn"]}:{kwargs["version_id"]}' elif resource_name in ('RolePolicy', 'UserPolicy', 'GroupPolicy'): return f'{kwargs["arn"]}:{kwargs["policy_name"]}' elif resource_name == 'PasswordPolicy': return f'{account_id}/PasswordPolicy' elif resource_name == 'instance-profile': return f'arn:{partition}:iam::{account_id}:{resource_name}/{kwargs["id"]}' elif resource_name == 'signing-certificate': return f'arn:{partition}:iam::{account_id}:{resource_name}/{kwargs["certificate_id"]}' raise GFInternal(f'Failed IAM ARN ({resource_name}) {kwargs}')
def _fetch_inline_policies(proxy: ServiceProxy, principal: str, name: str): kwargs = {f'{principal.capitalize()}Name': name} op = f'list_{principal}_policies' policies = _unpack(proxy.list(op, **kwargs)) policy_op = f'get_{principal}_policy' results = [] for policy_name in policies.get('PolicyNames', []): result = proxy.get(policy_op, PolicyName=policy_name, **kwargs) if result is None: raise GFInternal( f'Missing inline policy {policy_name} for {principal} {name}') results.append({ 'PolicyName': result['PolicyName'], 'PolicyDocument': result['PolicyDocument'] }) return results
def import_to_db(db: Session, import_job_id: int, service_spec: ServiceSpec): job = db.query(ImportJob).get(import_job_id) if job is None: raise GFInternal('Lost ImportJob') writer = db_import_writer(db, job.id, job.provider_account_id, self.name, phase=0, source='base') for path, account in account_paths_for_import(db, job): boto = load_boto_session(account) proxy = Proxy.build(boto) ps = PathStack.from_import_job(job) service_proxy = proxy.service(self.name) for fn in resource_fns: fn(service_proxy, writer, account.scope, ps, service_spec)
def delete_acct(account_spec: str, dry_run: bool, force: bool): db = import_session() account = db.query(ProviderAccount).filter( ProviderAccount.provider == 'aws', ProviderAccount.name == account_spec).one_or_none() if account is None: raise GFInternal(f'Could not find AWS account {account_spec}') remove = force or query_yes_no( f'Remove AWS account {account.name} from GoldFig?', default='no') if remove: report = delete_account(db, account) print(f'Removed from AWS account {account.name}') for table, count in report.items(): print(f'{table.ljust(36)}{str(count).rjust(6)} items') if not dry_run: db.commit() else: print('Aborting') db.rollback()
def import_region_to_db(db: Session, import_job_id: int, region: str, service_spec: ServiceSpec): job = db.query(ImportJob).get(import_job_id) if job is None: raise GFInternal('Lost ImportJob') writer = db_import_writer(db, job.id, job.provider_account_id, svc_name, phase=0, source='base') for path, account in account_paths_for_import(db, job): boto = load_boto_session(account) proxy = Proxy.build(boto) ps = PathStack.from_import_job(job).scope(account.scope) service_proxy = proxy.service(svc_name, region) ps = ps.scope(region) for resource_name, raw_resources in fn(service_proxy, region, service_spec): writer(ps, resource_name, raw_resources, {'region': region})
def apply_mapped_attrs(db: Session, import_job: ImportJob, path: str, mapped: MappedResource, attrs: List[MappedAttribute], source: str, raw_import_id: Optional[int]): resource = Resource(provider_account_id=import_job.provider_account_id, name=mapped.name, path=path, category=mapped.category, provider_type=mapped.provider_type, uri=mapped.uri, service=mapped.service) # TODO: possibly a big perf hit? # Consider using a different API if resource.uri is None: raise GFInternal(f'Missing uri {mapped}') db.merge( MappedURI(uri=resource.uri, source=source, import_job_id=import_job.id, provider_account_id=import_job.provider_account_id, raw_import_id=raw_import_id)) apply_resource(db, import_job, source, resource, mapped.raw, attrs)
def _map_relations_v1( self, path: str, raw_list, ctx: Context, service: str, resource_name: str, raw_uri_fn: UriFn, parent_args: Optional[Dict] = None ) -> Iterator[Tuple[Uri, str, Uri, List[Any]]]: # TODO: validate? transform = self._find_transform(service, resource_name) version = transform.spec.get('version', 0) if version != 1: raise GFInternal( f'v1 mapper called for {resource_name} with version {version}') resource_specs = transform.spec.get('resources', []) for spec in resource_specs: yield from self._map_relation_spec_v1(spec, path, raw_list, ctx, resource_name, raw_uri_fn, parent_args) subspecs = transform.spec.get('subresources', []) for spec in subspecs: subresource_key = spec.get('key') subresource_name = spec.get('type') parent_params_spec = spec.get('parent', {}) for parent in raw_list: subresources = parent.get(subresource_key, []) parent_params = { k: self.value_from_spec(v, parent, parent=parent_args) for k, v in parent_params_spec.items() } yield from self.map_relations( path, subresources, ctx, service, subresource_name, partial(raw_uri_fn, resource_name=subresource_name), parent_args=parent_params)
def _map_partial_v1(self, partial_spec: PartialSpec, raw_list: List[Any], ctx: Context, service: str, raw_uri_fn: UriFn, resource_name: str) -> Iterator[Partial]: provider_attr_spec = partial_spec.attributes.get('provider', []) custom_attr_spec = partial_spec.attributes.get('custom', {}) uri_fn: UriFn = partial(raw_uri_fn, resource_name=resource_name) for raw in raw_list: uri_args = { key: _find_path(path, raw) for key, path in partial_spec.uri.items() } target_uri: Uri = uri_fn(**uri_args, service=service, context=ctx) if target_uri is None: raise GFInternal( f'Failed to produce target uri {uri_args} {resource_name} {service} {ctx}' ) provider_attrs = self._map_provider_attrs(provider_attr_spec, raw) custom_attrs = self._map_custom_attrs(custom_attr_spec, None, raw, ctx) yield Partial(target_uri=target_uri, raw=raw, attrs=provider_attrs + custom_attrs)
def uri_for_parent(self, uri: str) -> str: # Only called for division, so no regions, just paths parts = uri.split(':') path = ':'.join(parts[5:]) org_segments = path.split('/') node_type = org_segments[0] if node_type == 'root': # This was the root, and it's in the organization return f'arn:{self._partition}:organizations::{self._master_account_id}:organization/{self._org_id}' elif node_type == 'account': account_id = org_segments[2] tail = self._path_to_account(account_id) else: ou_id = org_segments[2] tail = self._path_to_ou(ou_id) if tail.startswith('r-'): # This is contained directly in the root return f'arn:{self._partition}:organizations::{self._master_account_id}:root/{self._org_id}/{tail}' elif tail.startswith('ou-'): # This is contained in an organizational unit return f'arn:{self._partition}:organizations::{self._master_account_id}:ou/{self._org_id}/{tail}' else: raise GFInternal(f'Unknown AWS graph node {tail}')
def _map_provider_attrs(self, attr_names: List[Union[str, Dict]], raw) -> List[MappedAttribute]: attrs: List[MappedAttribute] = [] for attr_name in attr_names: if isinstance(attr_name, str): value = self.value_from_spec({'path': attr_name}, raw) if value is not None: attrs.append( MappedAttribute(type='provider', name=attr_name, value=value)) elif isinstance(attr_name, dict): for path_segment, keys in attr_name.items(): for key in keys: path = '.'.join([path_segment, key]) value = self.value_from_spec({'path': path}, raw) # Note that this flattens the namespace attrs.append( MappedAttribute(type='provider', name=key, value=value)) else: raise GFInternal(f'unknown attr_name {attr_name}') return attrs
def map_import(db: Session, import_job_id: int, partition: str, spec: ImportSpec): import_job = db.query(ImportJob).get(import_job_id) if import_job is None: raise GFInternal('Lost ImportJob') ps = PathStack.from_import_job(import_job) mapper = _get_mapper(import_job) gate = service_gate(spec) for path, account in account_paths_for_import(db, import_job): uri_fn = get_arn_fn(account.scope, partition) ps = PathStack.from_import_job(import_job).scope(account.scope) map_resource_prefix(db, import_job, ps.path(), mapper, uri_fn) boto = None proxy = None if gate('iam') is not None: boto = load_boto_session(account) proxy = Proxy.build(boto) synthesize_account_root(proxy, db, import_job, ps.path(), account.scope, partition) ec2_spec = gate('ec2') if ec2_spec is not None and resource_gate(ec2_spec, 'Images'): # Additional ec2 work if boto is None or proxy is None: boto = load_boto_session(account) proxy = Proxy.build(boto) adjunct_writer = db_import_writer(db, import_job.id, import_job.provider_account_id, 'ec2', phase=1, source='base') find_adjunct_data(db, proxy, adjunct_writer, import_job, ps, import_job) logs_spec = gate('logs') if logs_spec is not None and resource_gate(logs_spec, 'ResourcePolicies'): if boto is None or proxy is None: boto = load_boto_session(account) proxy = Proxy.build(boto) region_cache = RegionCache(boto, partition) adjunct_writer = db_import_writer(db, import_job.id, import_job.provider_account_id, 'logs', phase=1, source='logspolicies') add_logs_resource_policies(db, proxy, region_cache, adjunct_writer, import_job, ps, account.scope) for source in AWS_SOURCES: map_partial_prefix(db, mapper, import_job, source, ps.path(), uri_fn) map_partial_deletes(db, import_job, ps.path(), source, spec) # Re-map anything we've added map_resource_prefix(db, import_job, ps.path(), mapper, uri_fn) # Handle deletes map_resource_deletes(db, ps.path(), import_job, spec) found_relations = map_resource_relations(db, import_job, ps.path(), mapper, uri_fn) map_relation_deletes(db, import_job, ps.path(), found_relations, spec)
def list(self, key: str, kwargs, retry_on_throttle) -> Optional[Tuple[str, Any]]: prefix = len(key.split('_')[0]) resource_name = self._client._PY_TO_OP_NAME[key][prefix:] extra_kwargs = dict(self._list_args(key), **kwargs) try: if self._client.can_paginate(key): paginator = self._client.get_paginator(key) method_args = dict(self._paginate_args(key), **extra_kwargs) iterator = paginator.paginate(**method_args) result = iterator.build_full_result() else: op = self._client.meta.method_to_api_mapping[key] op_model = self._client.meta.service_model.operation_model(op) output = op_model.output_shape attr = getattr(self._client, key) full_result = attr(**extra_kwargs) result = { result_key: full_result[result_key] for result_key in output.members.keys() if result_key in full_result } return resource_name, result except KeyError as e: raise GFInternal( f'Pagination Exception raise {self._client._PY_TO_OP_NAME[key]}') except botocore.exceptions.ParamValidationError as e: # TODO: fix this _log.debug(f'{key} Needs param input {str(e)}') return resource_name, {ERROR_KEY: 'needs param input'} except botocore.exceptions.ClientError as e: code = e.response.get('Error', {}).get('Code') if code == 'UnsupportedOperation': _log.info(f'{resource_name} Not supported in this region') return resource_name, {ERROR_KEY: 'unsupported in region'} elif code == 'MissingParameter': return resource_name, {ERROR_KEY: 'missing parameter'} elif code == 'OptInRequired': return resource_name, {ERROR_KEY: 'missing opt-in'} elif code in ('AuthFailure', 'AccessDeniedException', 'UnauthorizedOperation', 'AccessDenied'): _log.warn(f'Missing permissions for {self._service} {key}') return resource_name, {ERROR_KEY: 'auth failure'} elif code == 'InvalidClientTokenId': return resource_name, {ERROR_KEY: 'invalid token'} elif code is not None and code.startswith('NoSuch'): # No results, return nothing return None elif code == 'Throttling' or code == 'ThrottlingException': if retry_on_throttle: _log.warn(f'Throttled for {key}, retrying') time.sleep(3) return self.list(key, kwargs, retry_on_throttle=False) else: _log.error(f'Throttled for {key}, not retrying') raise e else: mapped = self._map_error_code(code, resource_name) if mapped is not None: return resource_name, {ERROR_KEY: mapped} raise e
def _path_to_ou(self, ou_id: str) -> str: for path, ous in self._graph['organizational_units'].items(): for ou in ous: if ou['Id'] == ou_id: return path.split('/')[-1] raise GFInternal('Could not find organizational unit')
def report_for_import(db: Session, import_job: ImportJob) -> Report: # TODO: import status counts = db.execute( ''' SELECT change_type, COUNT(*) AS cnt FROM resource_delta WHERE import_job_id = :import_job_id GROUP BY change_type ''', {'import_job_id': import_job.id}) resources_added = 0 resources_updated = 0 resources_deleted = 0 for row in counts: change_type = row['change_type'] count = row['cnt'] if change_type == 'add': resources_added = count elif change_type == 'update': resources_updated = count elif change_type == 'delete': resources_deleted = count else: raise GFInternal( f'Unknown resource_delta.change_type {change_type}') attr_counts = db.execute( ''' SELECT RAD.change_type, COUNT(*) AS cnt FROM resource_attribute_delta AS RAD LEFT JOIN resource_delta AS RD ON RD.id = RAD.resource_delta_id WHERE RD.change_type = 'update' AND RD.import_job_id = :import_job_id GROUP BY RAD.change_type ''', {'import_job_id': import_job.id}) attrs_added = 0 attrs_updated = 0 attrs_deleted = 0 for row in attr_counts: change_type = row['change_type'] count = row['cnt'] if change_type == 'add': attrs_added = count elif change_type == 'update': attrs_updated = count elif change_type == 'delete': attrs_deleted = count else: raise GFInternal( f'Unknown resource_attribute_delta.change_type {change_type}') return Report(resources_added=resources_added, resources_updated=resources_updated, resources_deleted=resources_deleted, attrs_added=attrs_added, attrs_updated=attrs_updated, attrs_deleted=attrs_deleted, start=import_job.start_date, end=import_job.end_date, errors=import_job.error_details)
def _path_to_account(self, account_id: str) -> str: for path, accounts in self._graph['accounts'].items(): for account in accounts: if account['Id'] == account_id: return path.split('/')[-1] raise GFInternal('Could not find account')