def setUp(self): resource_a1 = Resource( resource_id="123", type="test:a", link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-foo", obj="goo")]), ) resource_a2 = Resource(resource_id="456", type="test:a", link_collection=LinkCollection(),) resource_b1 = Resource( resource_id="abc", type="test:b", link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-a", obj="123")]), ) resource_b2 = Resource( resource_id="def", type="test:b", link_collection=LinkCollection(simple_links=[SimpleLink(pred="name", obj="sue")]), ) resources = (resource_a1, resource_a2, resource_b1, resource_b2) self.validated_graph_set = ValidatedGraphSet( name="test-name", version="1", start_time=1234, end_time=4567, resources=resources, errors=["test err 1", "test err 2"], )
def write_graph_set(self, name: str, graph_set: ValidatedGraphSet, compression: Optional[str] = None) -> str: """Write a graph artifact Args: name: name graph_set: ValidatedGraphSet object to write Returns: path to written artifact """ logger = Logger() os.makedirs(self.output_dir, exist_ok=True) if compression is None: artifact_path = os.path.join(self.output_dir, f"{name}.rdf") elif compression == GZIP: artifact_path = os.path.join(self.output_dir, f"{name}.rdf.gz") else: raise ValueError(f"Unknown compression arg {compression}") graph = graph_set.to_rdf() with logger.bind(artifact_path=artifact_path): logger.info(event=LogEvent.WriteToFSStart) with open(artifact_path, "wb") as fp: if compression is None: graph.serialize(fp) elif compression == GZIP: with gzip.GzipFile(fileobj=fp, mode="wb") as gz: graph.serialize(gz) else: raise ValueError(f"Unknown compression arg {compression}") logger.info(event=LogEvent.WriteToFSEnd) return artifact_path
def test_orphaned_ref(self): resource_a1 = Resource( resource_id="123", type="test:a", link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-foo", obj="goo")]), ) resource_b1 = Resource( resource_id="abc", type="test:b", link_collection=LinkCollection(resource_links=[ResourceLink(pred="has-a", obj="456")]), ) resources = [resource_a1, resource_b1] graph_set = GraphSet( name="test-name", version="1", start_time=1234, end_time=4567, resources=resources, errors=["test err 1", "test err 2"], ) with self.assertRaises(GraphSetOrphanedReferencesException): ValidatedGraphSet.from_graph_set(graph_set)
def test_invalid_resources_dupes_same_class_conflicting_types_no_allow_clobber(self): resources = [ Resource(resource_id="123", type="test:a", link_collection=LinkCollection()), Resource(resource_id="123", type="test:b", link_collection=LinkCollection()), ] with self.assertRaises(UnmergableDuplicateResourceIdsFoundException): ValidatedGraphSet( name="test-name", version="1", start_time=1234, end_time=4567, resources=resources, errors=[], )
def test_unknown_type_name(self): resources = [ Resource(resource_id="xyz", type="test:a", link_collection=LinkCollection()), Resource(resource_id="xyz", type="test:c", link_collection=LinkCollection()), ] with self.assertRaises(ResourceSpecClassNotFoundException): ValidatedGraphSet( name="test-name", version="1", start_time=1234, end_time=4567, resources=resources, errors=[], )
class TestGraphSetWithValidDataMerging(TestCase): def setUp(self): resource_a1 = Resource( resource_id="123", type="test:a", link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-foo", obj="goo")]), ) resource_a2 = Resource( resource_id="123", type="test:a", link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-goo", obj="foo")]), ) resource_b1 = Resource( resource_id="abc", type="test:b", link_collection=LinkCollection(resource_links=[ResourceLink(pred="has-a", obj="123")]), ) resource_b2 = Resource( resource_id="def", type="test:b", link_collection=LinkCollection(simple_links=[SimpleLink(pred="name", obj="sue")]), ) resources = (resource_a1, resource_a2, resource_b1, resource_b2) self.validated_graph_set = ValidatedGraphSet( name="test-name", version="1", start_time=1234, end_time=4567, resources=resources, errors=["test err 1", "test err 2"], ) def test_rdf_a_type(self): graph = self.validated_graph_set.to_rdf() a_results = graph.query( "select ?p ?o where {?s a <test-name:test:a> ; ?p ?o} order by ?p ?o" ) expected_a_result_tuples = [ ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "test-name:test:a"), ("test-name:has-foo", "goo"), ("test-name:has-goo", "foo"), ("test-name:id", "123"), ] a_result_tuples = [] for a_result in a_results: self.assertEqual(2, len(a_result)) a_result_tuples.append((str(a_result[0]), str(a_result[1]))) self.assertEqual(expected_a_result_tuples, a_result_tuples)
class TestGraphSetWithValidDataNoMerging(TestCase): def setUp(self): resource_a1 = Resource( resource_id="123", type="test:a", link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-foo", obj="goo")]), ) resource_a2 = Resource(resource_id="456", type="test:a", link_collection=LinkCollection(),) resource_b1 = Resource( resource_id="abc", type="test:b", link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-a", obj="123")]), ) resource_b2 = Resource( resource_id="def", type="test:b", link_collection=LinkCollection(simple_links=[SimpleLink(pred="name", obj="sue")]), ) resources = (resource_a1, resource_a2, resource_b1, resource_b2) self.validated_graph_set = ValidatedGraphSet( name="test-name", version="1", start_time=1234, end_time=4567, resources=resources, errors=["test err 1", "test err 2"], ) def test_rdf_a_type(self): graph = self.validated_graph_set.to_rdf() a_results = graph.query( "select ?p ?o where {?s a <test-name:test:a> ; ?p ?o} order by ?p ?o" ) expected_a_result_tuples = [ ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "test-name:test:a"), ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "test-name:test:a"), ("test-name:has-foo", "goo"), ("test-name:id", "123"), ("test-name:id", "456"), ] a_result_tuples = [] for a_result in a_results: self.assertEqual(2, len(a_result)) a_result_tuples.append((str(a_result[0]), str(a_result[1]))) self.assertEqual(expected_a_result_tuples, a_result_tuples) def test_rdf_b_type(self): graph = self.validated_graph_set.to_rdf() graph.serialize("/tmp/test.rdf", format="xml") linked_a_node_results = graph.query( "select ?s where {?s a <test-name:test:a>; <test-name:id> '123' }" ) self.assertEqual(len(linked_a_node_results), 1) for linked_a_node_result in linked_a_node_results: linked_a_node = str(linked_a_node_result[0]) b_results = graph.query( "select ?p ?o where {?s a <test-name:test:b> ; ?p ?o} order by ?p ?o" ) expected_b_result_tuples = [ ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "test-name:test:b"), ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "test-name:test:b"), ("test-name:has-a", str(linked_a_node)), ("test-name:id", "abc"), ("test-name:id", "def"), ("test-name:name", "sue"), ] b_result_tuples = [] for b_result in b_results: self.assertEqual(2, len(b_result)) b_result_tuples.append((str(b_result[0]), str(b_result[1]))) self.assertEqual(expected_b_result_tuples, b_result_tuples) def test_rdf_error_graphing(self): graph = self.validated_graph_set.to_rdf() err_results = graph.query("select ?o where { ?s <test-name:error> ?o } order by ?o") err_strs = [] expected_err_strs = ["test err 1", "test err 2"] for err_result in err_results: self.assertEqual(1, len(err_result)) err_strs.append(str(err_result[0])) self.assertEqual(err_strs, expected_err_strs) def test_graph_content(self): expected_resources = ( Resource( resource_id="123", type="test:a", link_collection=LinkCollection( simple_links=(SimpleLink(pred="has-foo", obj="goo"),), ), ), Resource(resource_id="456", type="test:a", link_collection=LinkCollection(),), Resource( resource_id="abc", type="test:b", link_collection=LinkCollection( simple_links=(SimpleLink(pred="has-a", obj="123"),), ), ), Resource( resource_id="def", type="test:b", link_collection=LinkCollection(simple_links=(SimpleLink(pred="name", obj="sue"),),), ), ) expected_errors = ["test err 1", "test err 2"] self.assertEqual(self.validated_graph_set.resources, expected_resources) self.assertEqual(self.validated_graph_set.errors, expected_errors)
def test_valid_merge(self): resource_a1 = Resource( resource_id="123", type="test:a", link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-foo", obj="goo")]), ) resource_a2 = Resource(resource_id="456", type="test:a", link_collection=LinkCollection()) resource_b1 = Resource( resource_id="abc", type="test:b", link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-a", obj="123")]), ) resource_b2 = Resource( resource_id="def", type="test:b", link_collection=LinkCollection(simple_links=[SimpleLink(pred="name", obj="sue")]), ) graph_set_1 = GraphSet( name="graph-1", version="1", start_time=10, end_time=20, resources=[resource_a1, resource_a2], errors=["errora1", "errora2"], ) graph_set_2 = GraphSet( name="graph-1", version="1", start_time=15, end_time=25, resources=[resource_b1, resource_b2], errors=["errorb1", "errorb2"], ) merged_graph_set = ValidatedGraphSet.from_graph_sets([graph_set_1, graph_set_2]) self.assertEqual(merged_graph_set.name, "graph-1") self.assertEqual(merged_graph_set.version, "1") self.assertEqual(merged_graph_set.start_time, 10) self.assertEqual(merged_graph_set.end_time, 25) self.assertCountEqual(merged_graph_set.errors, ["errora1", "errora2", "errorb1", "errorb2"]) expected_resources = ( Resource( resource_id="123", type="test:a", link_collection=LinkCollection( simple_links=(SimpleLink(pred="has-foo", obj="goo"),), ), ), Resource(resource_id="456", type="test:a", link_collection=LinkCollection(),), Resource( resource_id="abc", type="test:b", link_collection=LinkCollection( simple_links=(SimpleLink(pred="has-a", obj="123"),), ), ), Resource( resource_id="def", type="test:b", link_collection=LinkCollection(simple_links=(SimpleLink(pred="name", obj="sue"),),), ), ) expected_errors = ["errora1", "errora2", "errorb1", "errorb2"] self.assertCountEqual(merged_graph_set.resources, expected_resources) self.assertCountEqual(merged_graph_set.errors, expected_errors)
def run_scan( muxer: AWSScanMuxer, config: AWSConfig, aws_resource_region_mapping_repo: AWSResourceRegionMappingRepository, artifact_writer: ArtifactWriter, artifact_reader: ArtifactReader, ) -> Tuple[ScanManifest, ValidatedGraphSet]: if config.scan.accounts: scan_account_ids = config.scan.accounts else: sts_client = boto3.client("sts") scan_account_id = sts_client.get_caller_identity()["Account"] scan_account_ids = (scan_account_id,) if config.scan.scan_sub_accounts: account_ids = get_sub_account_ids(scan_account_ids, config.accessor) else: account_ids = scan_account_ids scan_plan = ScanPlan( account_ids=account_ids, regions=config.scan.regions, aws_resource_region_mapping_repo=aws_resource_region_mapping_repo, accessor=config.accessor, ) logger = Logger() logger.info(event=AWSLogEvents.ScanAWSAccountsStart) # now combine account_scan_results and org_details to build a ScanManifest scanned_accounts: List[str] = [] artifacts: List[str] = [] errors: Dict[str, List[str]] = {} unscanned_accounts: Set[str] = set() graph_sets: List[GraphSet] = [] for account_scan_manifest in muxer.scan(scan_plan=scan_plan): account_id = account_scan_manifest.account_id if account_scan_manifest.errors: errors[account_id] = account_scan_manifest.errors unscanned_accounts.add(account_id) if account_scan_manifest.artifacts: for account_scan_artifact in account_scan_manifest.artifacts: artifacts.append(account_scan_artifact) artifact_graph_set_dict = artifact_reader.read_json(account_scan_artifact) graph_sets.append(GraphSet.parse_obj(artifact_graph_set_dict)) scanned_accounts.append(account_id) else: unscanned_accounts.add(account_id) if not graph_sets: raise Exception("BUG: No graph_sets generated.") validated_graph_set = ValidatedGraphSet.from_graph_set(GraphSet.from_graph_sets(graph_sets)) master_artifact_path: Optional[str] = None if config.write_master_json: master_artifact_path = artifact_writer.write_json(name="master", data=validated_graph_set) logger.info(event=AWSLogEvents.ScanAWSAccountsEnd) start_time = validated_graph_set.start_time end_time = validated_graph_set.end_time scan_manifest = ScanManifest( scanned_accounts=scanned_accounts, master_artifact=master_artifact_path, artifacts=artifacts, errors=errors, unscanned_accounts=list(unscanned_accounts), start_time=start_time, end_time=end_time, ) artifact_writer.write_json("manifest", data=scan_manifest) return scan_manifest, validated_graph_set
def write_graph_set(self, name: str, graph_set: ValidatedGraphSet, compression: Optional[str] = None) -> str: """Write a graph artifact Args: name: name graph_set: ValidatedGraphSet to write Returns: path to written artifact """ logger = Logger() if compression is None: key = f"{name}.rdf" elif compression == GZIP: key = f"{name}.rdf.gz" else: raise ValueError(f"Unknown compression arg {compression}") output_key = "/".join((self.key_prefix, key)) graph = graph_set.to_rdf() with logger.bind(bucket=self.bucket, key_prefix=self.key_prefix, key=key): logger.info(event=LogEvent.WriteToS3Start) with io.BytesIO() as rdf_bytes_buf: if compression is None: graph.serialize(rdf_bytes_buf) elif compression == GZIP: with gzip.GzipFile(fileobj=rdf_bytes_buf, mode="wb") as gz: graph.serialize(gz) else: raise ValueError(f"Unknown compression arg {compression}") rdf_bytes_buf.flush() rdf_bytes_buf.seek(0) session = boto3.Session() s3_client = session.client("s3") s3_client.upload_fileobj(rdf_bytes_buf, self.bucket, output_key) s3_client.put_object_tagging( Bucket=self.bucket, Key=output_key, Tagging={ "TagSet": [ { "Key": "name", "Value": graph_set.name }, { "Key": "version", "Value": graph_set.version }, { "Key": "start_time", "Value": str(graph_set.start_time) }, { "Key": "end_time", "Value": str(graph_set.end_time) }, ] }, ) logger.info(event=LogEvent.WriteToS3End) return f"s3://{self.bucket}/{output_key}"