def test_invalid_diff_versions(self): graph_set_1 = GraphSet( name="graph-1", version="1", start_time=10, end_time=20, resources=[], errors=[], ) graph_set_2 = GraphSet( name="graph-1", version="2", start_time=15, end_time=25, resources=[], errors=[], ) with self.assertRaises(UnmergableGraphSetsException): GraphSet.from_graph_sets([graph_set_1, graph_set_2])
def scan(self) -> AccountScanResult: logger = Logger() now = int(time.time()) prescan_errors: List[str] = [] futures: List[Future] = [] account_id = self.account_scan_plan.account_id with logger.bind(account_id=account_id): with ThreadPoolExecutor(max_workers=self.max_threads) as executor: logger.info(event=AWSLogEvents.ScanAWSAccountStart) try: session = self.account_scan_plan.accessor.get_session( account_id=account_id) # sanity check sts_client = session.client("sts") sts_account_id = sts_client.get_caller_identity( )["Account"] if sts_account_id != account_id: raise ValueError( f"BUG: sts detected account_id {sts_account_id} != {account_id}" ) if self.account_scan_plan.regions: account_scan_regions = tuple( self.account_scan_plan.regions) else: account_scan_regions = get_all_enabled_regions( session=session) # build a dict of regions -> services -> List[AWSResourceSpec] regions_services_resource_spec_classes: DefaultDict[ str, DefaultDict[ str, List[Type[AWSResourceSpec]]]] = defaultdict( lambda: defaultdict(list)) for resource_spec_class in self.resource_spec_classes: resource_regions = self.account_scan_plan.aws_resource_region_mapping_repo.get_regions( resource_spec_class=resource_spec_class, region_whitelist=account_scan_regions, ) for region in resource_regions: regions_services_resource_spec_classes[region][ resource_spec_class.service_name].append( resource_spec_class) # Build and submit ScanUnits shuffed_regions_services_resource_spec_classes = random.sample( regions_services_resource_spec_classes.items(), len(regions_services_resource_spec_classes), ) for ( region, services_resource_spec_classes, ) in shuffed_regions_services_resource_spec_classes: region_session = self.account_scan_plan.accessor.get_session( account_id=account_id, region_name=region) region_creds = region_session.get_credentials() shuffled_services_resource_spec_classes = random.sample( services_resource_spec_classes.items(), len(services_resource_spec_classes), ) for ( service, svc_resource_spec_classes, ) in shuffled_services_resource_spec_classes: parallel_svc_resource_spec_classes = [ svc_resource_spec_class for svc_resource_spec_class in svc_resource_spec_classes if svc_resource_spec_class.parallel_scan ] serial_svc_resource_spec_classes = [ svc_resource_spec_class for svc_resource_spec_class in svc_resource_spec_classes if not svc_resource_spec_class.parallel_scan ] for (parallel_svc_resource_spec_class ) in parallel_svc_resource_spec_classes: parallel_future = schedule_scan( executor=executor, graph_name=self.graph_name, graph_version=self.graph_version, account_id=account_id, region_name=region, service=service, access_key=region_creds.access_key, secret_key=region_creds.secret_key, token=region_creds.token, resource_spec_classes=( parallel_svc_resource_spec_class, ), ) futures.append(parallel_future) serial_future = schedule_scan( executor=executor, graph_name=self.graph_name, graph_version=self.graph_version, account_id=account_id, region_name=region, service=service, access_key=region_creds.access_key, secret_key=region_creds.secret_key, token=region_creds.token, resource_spec_classes=tuple( serial_svc_resource_spec_classes), ) futures.append(serial_future) except Exception as ex: error_str = str(ex) trace_back = traceback.format_exc() logger.error( event=AWSLogEvents.ScanAWSAccountError, error=error_str, trace_back=trace_back, ) prescan_errors.append(f"{error_str}\n{trace_back}") graph_sets: List[GraphSet] = [] for future in as_completed(futures): graph_set = future.result() graph_sets.append(graph_set) # if there was a prescan error graph it and return the result if prescan_errors: unscanned_account_resource = UnscannedAccountResourceSpec.create_resource( account_id=account_id, errors=prescan_errors) account_graph_set = GraphSet( name=self.graph_name, version=self.graph_version, start_time=now, end_time=now, resources=[unscanned_account_resource], errors=prescan_errors, ) output_artifact = self.artifact_writer.write_json( name=account_id, data=account_graph_set, ) logger.info(event=AWSLogEvents.ScanAWSAccountEnd) return AccountScanResult( account_id=account_id, artifacts=[output_artifact], errors=prescan_errors, ) # if there are any errors whatsoever we generate an empty graph with errors only errors = [] for graph_set in graph_sets: errors += graph_set.errors if errors: unscanned_account_resource = UnscannedAccountResourceSpec.create_resource( account_id=account_id, errors=errors) account_graph_set = GraphSet( name=self.graph_name, version=self.graph_version, start_time=now, end_time=now, resources=[unscanned_account_resource], errors=errors, ) else: account_graph_set = GraphSet.from_graph_sets(graph_sets) output_artifact = self.artifact_writer.write_json( name=account_id, data=account_graph_set, ) logger.info(event=AWSLogEvents.ScanAWSAccountEnd) return AccountScanResult( account_id=account_id, artifacts=[output_artifact], errors=errors, )
def run_scan( muxer: AWSScanMuxer, config: AWSConfig, aws_resource_region_mapping_repo: AWSResourceRegionMappingRepository, artifact_writer: ArtifactWriter, artifact_reader: ArtifactReader, ) -> Tuple[ScanManifest, ValidatedGraphSet]: if config.scan.accounts: scan_account_ids = config.scan.accounts else: sts_client = boto3.client("sts") scan_account_id = sts_client.get_caller_identity()["Account"] scan_account_ids = (scan_account_id,) if config.scan.scan_sub_accounts: account_ids = get_sub_account_ids(scan_account_ids, config.accessor) else: account_ids = scan_account_ids scan_plan = ScanPlan( account_ids=account_ids, regions=config.scan.regions, aws_resource_region_mapping_repo=aws_resource_region_mapping_repo, accessor=config.accessor, ) logger = Logger() logger.info(event=AWSLogEvents.ScanAWSAccountsStart) # now combine account_scan_results and org_details to build a ScanManifest scanned_accounts: List[str] = [] artifacts: List[str] = [] errors: Dict[str, List[str]] = {} unscanned_accounts: Set[str] = set() graph_sets: List[GraphSet] = [] for account_scan_manifest in muxer.scan(scan_plan=scan_plan): account_id = account_scan_manifest.account_id if account_scan_manifest.errors: errors[account_id] = account_scan_manifest.errors unscanned_accounts.add(account_id) if account_scan_manifest.artifacts: for account_scan_artifact in account_scan_manifest.artifacts: artifacts.append(account_scan_artifact) artifact_graph_set_dict = artifact_reader.read_json(account_scan_artifact) graph_sets.append(GraphSet.parse_obj(artifact_graph_set_dict)) scanned_accounts.append(account_id) else: unscanned_accounts.add(account_id) if not graph_sets: raise Exception("BUG: No graph_sets generated.") validated_graph_set = ValidatedGraphSet.from_graph_set(GraphSet.from_graph_sets(graph_sets)) master_artifact_path: Optional[str] = None if config.write_master_json: master_artifact_path = artifact_writer.write_json(name="master", data=validated_graph_set) logger.info(event=AWSLogEvents.ScanAWSAccountsEnd) start_time = validated_graph_set.start_time end_time = validated_graph_set.end_time scan_manifest = ScanManifest( scanned_accounts=scanned_accounts, master_artifact=master_artifact_path, artifacts=artifacts, errors=errors, unscanned_accounts=list(unscanned_accounts), start_time=start_time, end_time=end_time, ) artifact_writer.write_json("manifest", data=scan_manifest) return scan_manifest, validated_graph_set