Ejemplo n.º 1
0
 def test_invalid_diff_versions(self):
     graph_set_1 = GraphSet(
         name="graph-1", version="1", start_time=10, end_time=20, resources=[], errors=[],
     )
     graph_set_2 = GraphSet(
         name="graph-1", version="2", start_time=15, end_time=25, resources=[], errors=[],
     )
     with self.assertRaises(UnmergableGraphSetsException):
         GraphSet.from_graph_sets([graph_set_1, graph_set_2])
Ejemplo n.º 2
0
 def scan(self) -> AccountScanResult:
     logger = Logger()
     now = int(time.time())
     prescan_errors: List[str] = []
     futures: List[Future] = []
     account_id = self.account_scan_plan.account_id
     with logger.bind(account_id=account_id):
         with ThreadPoolExecutor(max_workers=self.max_threads) as executor:
             logger.info(event=AWSLogEvents.ScanAWSAccountStart)
             try:
                 session = self.account_scan_plan.accessor.get_session(
                     account_id=account_id)
                 # sanity check
                 sts_client = session.client("sts")
                 sts_account_id = sts_client.get_caller_identity(
                 )["Account"]
                 if sts_account_id != account_id:
                     raise ValueError(
                         f"BUG: sts detected account_id {sts_account_id} != {account_id}"
                     )
                 if self.account_scan_plan.regions:
                     account_scan_regions = tuple(
                         self.account_scan_plan.regions)
                 else:
                     account_scan_regions = get_all_enabled_regions(
                         session=session)
                 # build a dict of regions -> services -> List[AWSResourceSpec]
                 regions_services_resource_spec_classes: DefaultDict[
                     str, DefaultDict[
                         str, List[Type[AWSResourceSpec]]]] = defaultdict(
                             lambda: defaultdict(list))
                 for resource_spec_class in self.resource_spec_classes:
                     resource_regions = self.account_scan_plan.aws_resource_region_mapping_repo.get_regions(
                         resource_spec_class=resource_spec_class,
                         region_whitelist=account_scan_regions,
                     )
                     for region in resource_regions:
                         regions_services_resource_spec_classes[region][
                             resource_spec_class.service_name].append(
                                 resource_spec_class)
                 # Build and submit ScanUnits
                 shuffed_regions_services_resource_spec_classes = random.sample(
                     regions_services_resource_spec_classes.items(),
                     len(regions_services_resource_spec_classes),
                 )
                 for (
                         region,
                         services_resource_spec_classes,
                 ) in shuffed_regions_services_resource_spec_classes:
                     region_session = self.account_scan_plan.accessor.get_session(
                         account_id=account_id, region_name=region)
                     region_creds = region_session.get_credentials()
                     shuffled_services_resource_spec_classes = random.sample(
                         services_resource_spec_classes.items(),
                         len(services_resource_spec_classes),
                     )
                     for (
                             service,
                             svc_resource_spec_classes,
                     ) in shuffled_services_resource_spec_classes:
                         parallel_svc_resource_spec_classes = [
                             svc_resource_spec_class
                             for svc_resource_spec_class in
                             svc_resource_spec_classes
                             if svc_resource_spec_class.parallel_scan
                         ]
                         serial_svc_resource_spec_classes = [
                             svc_resource_spec_class
                             for svc_resource_spec_class in
                             svc_resource_spec_classes
                             if not svc_resource_spec_class.parallel_scan
                         ]
                         for (parallel_svc_resource_spec_class
                              ) in parallel_svc_resource_spec_classes:
                             parallel_future = schedule_scan(
                                 executor=executor,
                                 graph_name=self.graph_name,
                                 graph_version=self.graph_version,
                                 account_id=account_id,
                                 region_name=region,
                                 service=service,
                                 access_key=region_creds.access_key,
                                 secret_key=region_creds.secret_key,
                                 token=region_creds.token,
                                 resource_spec_classes=(
                                     parallel_svc_resource_spec_class, ),
                             )
                             futures.append(parallel_future)
                         serial_future = schedule_scan(
                             executor=executor,
                             graph_name=self.graph_name,
                             graph_version=self.graph_version,
                             account_id=account_id,
                             region_name=region,
                             service=service,
                             access_key=region_creds.access_key,
                             secret_key=region_creds.secret_key,
                             token=region_creds.token,
                             resource_spec_classes=tuple(
                                 serial_svc_resource_spec_classes),
                         )
                         futures.append(serial_future)
             except Exception as ex:
                 error_str = str(ex)
                 trace_back = traceback.format_exc()
                 logger.error(
                     event=AWSLogEvents.ScanAWSAccountError,
                     error=error_str,
                     trace_back=trace_back,
                 )
                 prescan_errors.append(f"{error_str}\n{trace_back}")
         graph_sets: List[GraphSet] = []
         for future in as_completed(futures):
             graph_set = future.result()
             graph_sets.append(graph_set)
         # if there was a prescan error graph it and return the result
         if prescan_errors:
             unscanned_account_resource = UnscannedAccountResourceSpec.create_resource(
                 account_id=account_id, errors=prescan_errors)
             account_graph_set = GraphSet(
                 name=self.graph_name,
                 version=self.graph_version,
                 start_time=now,
                 end_time=now,
                 resources=[unscanned_account_resource],
                 errors=prescan_errors,
             )
             output_artifact = self.artifact_writer.write_json(
                 name=account_id,
                 data=account_graph_set,
             )
             logger.info(event=AWSLogEvents.ScanAWSAccountEnd)
             return AccountScanResult(
                 account_id=account_id,
                 artifacts=[output_artifact],
                 errors=prescan_errors,
             )
         # if there are any errors whatsoever we generate an empty graph with errors only
         errors = []
         for graph_set in graph_sets:
             errors += graph_set.errors
         if errors:
             unscanned_account_resource = UnscannedAccountResourceSpec.create_resource(
                 account_id=account_id, errors=errors)
             account_graph_set = GraphSet(
                 name=self.graph_name,
                 version=self.graph_version,
                 start_time=now,
                 end_time=now,
                 resources=[unscanned_account_resource],
                 errors=errors,
             )
         else:
             account_graph_set = GraphSet.from_graph_sets(graph_sets)
         output_artifact = self.artifact_writer.write_json(
             name=account_id,
             data=account_graph_set,
         )
         logger.info(event=AWSLogEvents.ScanAWSAccountEnd)
         return AccountScanResult(
             account_id=account_id,
             artifacts=[output_artifact],
             errors=errors,
         )
def run_scan(
    muxer: AWSScanMuxer,
    config: AWSConfig,
    aws_resource_region_mapping_repo: AWSResourceRegionMappingRepository,
    artifact_writer: ArtifactWriter,
    artifact_reader: ArtifactReader,
) -> Tuple[ScanManifest, ValidatedGraphSet]:
    if config.scan.accounts:
        scan_account_ids = config.scan.accounts
    else:
        sts_client = boto3.client("sts")
        scan_account_id = sts_client.get_caller_identity()["Account"]
        scan_account_ids = (scan_account_id,)
    if config.scan.scan_sub_accounts:
        account_ids = get_sub_account_ids(scan_account_ids, config.accessor)
    else:
        account_ids = scan_account_ids
    scan_plan = ScanPlan(
        account_ids=account_ids,
        regions=config.scan.regions,
        aws_resource_region_mapping_repo=aws_resource_region_mapping_repo,
        accessor=config.accessor,
    )
    logger = Logger()
    logger.info(event=AWSLogEvents.ScanAWSAccountsStart)
    # now combine account_scan_results and org_details to build a ScanManifest
    scanned_accounts: List[str] = []
    artifacts: List[str] = []
    errors: Dict[str, List[str]] = {}
    unscanned_accounts: Set[str] = set()
    graph_sets: List[GraphSet] = []

    for account_scan_manifest in muxer.scan(scan_plan=scan_plan):
        account_id = account_scan_manifest.account_id
        if account_scan_manifest.errors:
            errors[account_id] = account_scan_manifest.errors
            unscanned_accounts.add(account_id)
        if account_scan_manifest.artifacts:
            for account_scan_artifact in account_scan_manifest.artifacts:
                artifacts.append(account_scan_artifact)
                artifact_graph_set_dict = artifact_reader.read_json(account_scan_artifact)
                graph_sets.append(GraphSet.parse_obj(artifact_graph_set_dict))
            scanned_accounts.append(account_id)
        else:
            unscanned_accounts.add(account_id)
    if not graph_sets:
        raise Exception("BUG: No graph_sets generated.")
    validated_graph_set = ValidatedGraphSet.from_graph_set(GraphSet.from_graph_sets(graph_sets))
    master_artifact_path: Optional[str] = None
    if config.write_master_json:
        master_artifact_path = artifact_writer.write_json(name="master", data=validated_graph_set)
    logger.info(event=AWSLogEvents.ScanAWSAccountsEnd)
    start_time = validated_graph_set.start_time
    end_time = validated_graph_set.end_time
    scan_manifest = ScanManifest(
        scanned_accounts=scanned_accounts,
        master_artifact=master_artifact_path,
        artifacts=artifacts,
        errors=errors,
        unscanned_accounts=list(unscanned_accounts),
        start_time=start_time,
        end_time=end_time,
    )
    artifact_writer.write_json("manifest", data=scan_manifest)
    return scan_manifest, validated_graph_set