コード例 #1
0
 def setUp(self):
     resource_a1 = Resource(
         resource_id="123",
         type="test:a",
         link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-foo", obj="goo")]),
     )
     resource_a2 = Resource(resource_id="456", type="test:a", link_collection=LinkCollection(),)
     resource_b1 = Resource(
         resource_id="abc",
         type="test:b",
         link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-a", obj="123")]),
     )
     resource_b2 = Resource(
         resource_id="def",
         type="test:b",
         link_collection=LinkCollection(simple_links=[SimpleLink(pred="name", obj="sue")]),
     )
     resources = (resource_a1, resource_a2, resource_b1, resource_b2)
     self.validated_graph_set = ValidatedGraphSet(
         name="test-name",
         version="1",
         start_time=1234,
         end_time=4567,
         resources=resources,
         errors=["test err 1", "test err 2"],
     )
コード例 #2
0
    def write_graph_set(self,
                        name: str,
                        graph_set: ValidatedGraphSet,
                        compression: Optional[str] = None) -> str:
        """Write a graph artifact

        Args:
            name: name
            graph_set: ValidatedGraphSet object to write

        Returns:
            path to written artifact
        """
        logger = Logger()
        os.makedirs(self.output_dir, exist_ok=True)
        if compression is None:
            artifact_path = os.path.join(self.output_dir, f"{name}.rdf")
        elif compression == GZIP:
            artifact_path = os.path.join(self.output_dir, f"{name}.rdf.gz")
        else:
            raise ValueError(f"Unknown compression arg {compression}")
        graph = graph_set.to_rdf()
        with logger.bind(artifact_path=artifact_path):
            logger.info(event=LogEvent.WriteToFSStart)
            with open(artifact_path, "wb") as fp:
                if compression is None:
                    graph.serialize(fp)
                elif compression == GZIP:
                    with gzip.GzipFile(fileobj=fp, mode="wb") as gz:
                        graph.serialize(gz)
                else:
                    raise ValueError(f"Unknown compression arg {compression}")
            logger.info(event=LogEvent.WriteToFSEnd)
        return artifact_path
コード例 #3
0
 def test_orphaned_ref(self):
     resource_a1 = Resource(
         resource_id="123",
         type="test:a",
         link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-foo", obj="goo")]),
     )
     resource_b1 = Resource(
         resource_id="abc",
         type="test:b",
         link_collection=LinkCollection(resource_links=[ResourceLink(pred="has-a", obj="456")]),
     )
     resources = [resource_a1, resource_b1]
     graph_set = GraphSet(
         name="test-name",
         version="1",
         start_time=1234,
         end_time=4567,
         resources=resources,
         errors=["test err 1", "test err 2"],
     )
     with self.assertRaises(GraphSetOrphanedReferencesException):
         ValidatedGraphSet.from_graph_set(graph_set)
コード例 #4
0
 def test_invalid_resources_dupes_same_class_conflicting_types_no_allow_clobber(self):
     resources = [
         Resource(resource_id="123", type="test:a", link_collection=LinkCollection()),
         Resource(resource_id="123", type="test:b", link_collection=LinkCollection()),
     ]
     with self.assertRaises(UnmergableDuplicateResourceIdsFoundException):
         ValidatedGraphSet(
             name="test-name",
             version="1",
             start_time=1234,
             end_time=4567,
             resources=resources,
             errors=[],
         )
コード例 #5
0
 def test_unknown_type_name(self):
     resources = [
         Resource(resource_id="xyz", type="test:a", link_collection=LinkCollection()),
         Resource(resource_id="xyz", type="test:c", link_collection=LinkCollection()),
     ]
     with self.assertRaises(ResourceSpecClassNotFoundException):
         ValidatedGraphSet(
             name="test-name",
             version="1",
             start_time=1234,
             end_time=4567,
             resources=resources,
             errors=[],
         )
コード例 #6
0
class TestGraphSetWithValidDataMerging(TestCase):
    def setUp(self):
        resource_a1 = Resource(
            resource_id="123",
            type="test:a",
            link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-foo", obj="goo")]),
        )
        resource_a2 = Resource(
            resource_id="123",
            type="test:a",
            link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-goo", obj="foo")]),
        )
        resource_b1 = Resource(
            resource_id="abc",
            type="test:b",
            link_collection=LinkCollection(resource_links=[ResourceLink(pred="has-a", obj="123")]),
        )
        resource_b2 = Resource(
            resource_id="def",
            type="test:b",
            link_collection=LinkCollection(simple_links=[SimpleLink(pred="name", obj="sue")]),
        )
        resources = (resource_a1, resource_a2, resource_b1, resource_b2)
        self.validated_graph_set = ValidatedGraphSet(
            name="test-name",
            version="1",
            start_time=1234,
            end_time=4567,
            resources=resources,
            errors=["test err 1", "test err 2"],
        )

    def test_rdf_a_type(self):
        graph = self.validated_graph_set.to_rdf()

        a_results = graph.query(
            "select ?p ?o where {?s a <test-name:test:a> ; ?p ?o} order by ?p ?o"
        )
        expected_a_result_tuples = [
            ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "test-name:test:a"),
            ("test-name:has-foo", "goo"),
            ("test-name:has-goo", "foo"),
            ("test-name:id", "123"),
        ]
        a_result_tuples = []
        for a_result in a_results:
            self.assertEqual(2, len(a_result))
            a_result_tuples.append((str(a_result[0]), str(a_result[1])))
        self.assertEqual(expected_a_result_tuples, a_result_tuples)
コード例 #7
0
class TestGraphSetWithValidDataNoMerging(TestCase):
    def setUp(self):
        resource_a1 = Resource(
            resource_id="123",
            type="test:a",
            link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-foo", obj="goo")]),
        )
        resource_a2 = Resource(resource_id="456", type="test:a", link_collection=LinkCollection(),)
        resource_b1 = Resource(
            resource_id="abc",
            type="test:b",
            link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-a", obj="123")]),
        )
        resource_b2 = Resource(
            resource_id="def",
            type="test:b",
            link_collection=LinkCollection(simple_links=[SimpleLink(pred="name", obj="sue")]),
        )
        resources = (resource_a1, resource_a2, resource_b1, resource_b2)
        self.validated_graph_set = ValidatedGraphSet(
            name="test-name",
            version="1",
            start_time=1234,
            end_time=4567,
            resources=resources,
            errors=["test err 1", "test err 2"],
        )

    def test_rdf_a_type(self):
        graph = self.validated_graph_set.to_rdf()

        a_results = graph.query(
            "select ?p ?o where {?s a <test-name:test:a> ; ?p ?o} order by ?p ?o"
        )
        expected_a_result_tuples = [
            ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "test-name:test:a"),
            ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "test-name:test:a"),
            ("test-name:has-foo", "goo"),
            ("test-name:id", "123"),
            ("test-name:id", "456"),
        ]
        a_result_tuples = []
        for a_result in a_results:
            self.assertEqual(2, len(a_result))
            a_result_tuples.append((str(a_result[0]), str(a_result[1])))
        self.assertEqual(expected_a_result_tuples, a_result_tuples)

    def test_rdf_b_type(self):
        graph = self.validated_graph_set.to_rdf()
        graph.serialize("/tmp/test.rdf", format="xml")
        linked_a_node_results = graph.query(
            "select ?s where {?s a <test-name:test:a>; <test-name:id> '123' }"
        )
        self.assertEqual(len(linked_a_node_results), 1)
        for linked_a_node_result in linked_a_node_results:
            linked_a_node = str(linked_a_node_result[0])
        b_results = graph.query(
            "select ?p ?o where {?s a <test-name:test:b> ; ?p ?o} order by ?p ?o"
        )
        expected_b_result_tuples = [
            ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "test-name:test:b"),
            ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "test-name:test:b"),
            ("test-name:has-a", str(linked_a_node)),
            ("test-name:id", "abc"),
            ("test-name:id", "def"),
            ("test-name:name", "sue"),
        ]
        b_result_tuples = []
        for b_result in b_results:
            self.assertEqual(2, len(b_result))
            b_result_tuples.append((str(b_result[0]), str(b_result[1])))
        self.assertEqual(expected_b_result_tuples, b_result_tuples)

    def test_rdf_error_graphing(self):
        graph = self.validated_graph_set.to_rdf()

        err_results = graph.query("select ?o where { ?s <test-name:error> ?o } order by ?o")
        err_strs = []
        expected_err_strs = ["test err 1", "test err 2"]
        for err_result in err_results:
            self.assertEqual(1, len(err_result))
            err_strs.append(str(err_result[0]))
        self.assertEqual(err_strs, expected_err_strs)

    def test_graph_content(self):
        expected_resources = (
            Resource(
                resource_id="123",
                type="test:a",
                link_collection=LinkCollection(
                    simple_links=(SimpleLink(pred="has-foo", obj="goo"),),
                ),
            ),
            Resource(resource_id="456", type="test:a", link_collection=LinkCollection(),),
            Resource(
                resource_id="abc",
                type="test:b",
                link_collection=LinkCollection(
                    simple_links=(SimpleLink(pred="has-a", obj="123"),),
                ),
            ),
            Resource(
                resource_id="def",
                type="test:b",
                link_collection=LinkCollection(simple_links=(SimpleLink(pred="name", obj="sue"),),),
            ),
        )
        expected_errors = ["test err 1", "test err 2"]
        self.assertEqual(self.validated_graph_set.resources, expected_resources)
        self.assertEqual(self.validated_graph_set.errors, expected_errors)
コード例 #8
0
    def test_valid_merge(self):
        resource_a1 = Resource(
            resource_id="123",
            type="test:a",
            link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-foo", obj="goo")]),
        )
        resource_a2 = Resource(resource_id="456", type="test:a", link_collection=LinkCollection())
        resource_b1 = Resource(
            resource_id="abc",
            type="test:b",
            link_collection=LinkCollection(simple_links=[SimpleLink(pred="has-a", obj="123")]),
        )
        resource_b2 = Resource(
            resource_id="def",
            type="test:b",
            link_collection=LinkCollection(simple_links=[SimpleLink(pred="name", obj="sue")]),
        )
        graph_set_1 = GraphSet(
            name="graph-1",
            version="1",
            start_time=10,
            end_time=20,
            resources=[resource_a1, resource_a2],
            errors=["errora1", "errora2"],
        )
        graph_set_2 = GraphSet(
            name="graph-1",
            version="1",
            start_time=15,
            end_time=25,
            resources=[resource_b1, resource_b2],
            errors=["errorb1", "errorb2"],
        )
        merged_graph_set = ValidatedGraphSet.from_graph_sets([graph_set_1, graph_set_2])

        self.assertEqual(merged_graph_set.name, "graph-1")
        self.assertEqual(merged_graph_set.version, "1")
        self.assertEqual(merged_graph_set.start_time, 10)
        self.assertEqual(merged_graph_set.end_time, 25)
        self.assertCountEqual(merged_graph_set.errors, ["errora1", "errora2", "errorb1", "errorb2"])

        expected_resources = (
            Resource(
                resource_id="123",
                type="test:a",
                link_collection=LinkCollection(
                    simple_links=(SimpleLink(pred="has-foo", obj="goo"),),
                ),
            ),
            Resource(resource_id="456", type="test:a", link_collection=LinkCollection(),),
            Resource(
                resource_id="abc",
                type="test:b",
                link_collection=LinkCollection(
                    simple_links=(SimpleLink(pred="has-a", obj="123"),),
                ),
            ),
            Resource(
                resource_id="def",
                type="test:b",
                link_collection=LinkCollection(simple_links=(SimpleLink(pred="name", obj="sue"),),),
            ),
        )
        expected_errors = ["errora1", "errora2", "errorb1", "errorb2"]

        self.assertCountEqual(merged_graph_set.resources, expected_resources)
        self.assertCountEqual(merged_graph_set.errors, expected_errors)
コード例 #9
0
def run_scan(
    muxer: AWSScanMuxer,
    config: AWSConfig,
    aws_resource_region_mapping_repo: AWSResourceRegionMappingRepository,
    artifact_writer: ArtifactWriter,
    artifact_reader: ArtifactReader,
) -> Tuple[ScanManifest, ValidatedGraphSet]:
    if config.scan.accounts:
        scan_account_ids = config.scan.accounts
    else:
        sts_client = boto3.client("sts")
        scan_account_id = sts_client.get_caller_identity()["Account"]
        scan_account_ids = (scan_account_id,)
    if config.scan.scan_sub_accounts:
        account_ids = get_sub_account_ids(scan_account_ids, config.accessor)
    else:
        account_ids = scan_account_ids
    scan_plan = ScanPlan(
        account_ids=account_ids,
        regions=config.scan.regions,
        aws_resource_region_mapping_repo=aws_resource_region_mapping_repo,
        accessor=config.accessor,
    )
    logger = Logger()
    logger.info(event=AWSLogEvents.ScanAWSAccountsStart)
    # now combine account_scan_results and org_details to build a ScanManifest
    scanned_accounts: List[str] = []
    artifacts: List[str] = []
    errors: Dict[str, List[str]] = {}
    unscanned_accounts: Set[str] = set()
    graph_sets: List[GraphSet] = []

    for account_scan_manifest in muxer.scan(scan_plan=scan_plan):
        account_id = account_scan_manifest.account_id
        if account_scan_manifest.errors:
            errors[account_id] = account_scan_manifest.errors
            unscanned_accounts.add(account_id)
        if account_scan_manifest.artifacts:
            for account_scan_artifact in account_scan_manifest.artifacts:
                artifacts.append(account_scan_artifact)
                artifact_graph_set_dict = artifact_reader.read_json(account_scan_artifact)
                graph_sets.append(GraphSet.parse_obj(artifact_graph_set_dict))
            scanned_accounts.append(account_id)
        else:
            unscanned_accounts.add(account_id)
    if not graph_sets:
        raise Exception("BUG: No graph_sets generated.")
    validated_graph_set = ValidatedGraphSet.from_graph_set(GraphSet.from_graph_sets(graph_sets))
    master_artifact_path: Optional[str] = None
    if config.write_master_json:
        master_artifact_path = artifact_writer.write_json(name="master", data=validated_graph_set)
    logger.info(event=AWSLogEvents.ScanAWSAccountsEnd)
    start_time = validated_graph_set.start_time
    end_time = validated_graph_set.end_time
    scan_manifest = ScanManifest(
        scanned_accounts=scanned_accounts,
        master_artifact=master_artifact_path,
        artifacts=artifacts,
        errors=errors,
        unscanned_accounts=list(unscanned_accounts),
        start_time=start_time,
        end_time=end_time,
    )
    artifact_writer.write_json("manifest", data=scan_manifest)
    return scan_manifest, validated_graph_set
コード例 #10
0
    def write_graph_set(self,
                        name: str,
                        graph_set: ValidatedGraphSet,
                        compression: Optional[str] = None) -> str:
        """Write a graph artifact

        Args:
            name: name
            graph_set: ValidatedGraphSet to write

        Returns:
            path to written artifact
        """
        logger = Logger()
        if compression is None:
            key = f"{name}.rdf"
        elif compression == GZIP:
            key = f"{name}.rdf.gz"
        else:
            raise ValueError(f"Unknown compression arg {compression}")
        output_key = "/".join((self.key_prefix, key))
        graph = graph_set.to_rdf()
        with logger.bind(bucket=self.bucket,
                         key_prefix=self.key_prefix,
                         key=key):
            logger.info(event=LogEvent.WriteToS3Start)
            with io.BytesIO() as rdf_bytes_buf:
                if compression is None:
                    graph.serialize(rdf_bytes_buf)
                elif compression == GZIP:
                    with gzip.GzipFile(fileobj=rdf_bytes_buf, mode="wb") as gz:
                        graph.serialize(gz)
                else:
                    raise ValueError(f"Unknown compression arg {compression}")
                rdf_bytes_buf.flush()
                rdf_bytes_buf.seek(0)
                session = boto3.Session()
                s3_client = session.client("s3")
                s3_client.upload_fileobj(rdf_bytes_buf, self.bucket,
                                         output_key)
            s3_client.put_object_tagging(
                Bucket=self.bucket,
                Key=output_key,
                Tagging={
                    "TagSet": [
                        {
                            "Key": "name",
                            "Value": graph_set.name
                        },
                        {
                            "Key": "version",
                            "Value": graph_set.version
                        },
                        {
                            "Key": "start_time",
                            "Value": str(graph_set.start_time)
                        },
                        {
                            "Key": "end_time",
                            "Value": str(graph_set.end_time)
                        },
                    ]
                },
            )
            logger.info(event=LogEvent.WriteToS3End)
        return f"s3://{self.bucket}/{output_key}"