Esempio n. 1
0
def build_data_structure(account_data, config, outputfilter):
    cytoscape_json = []

    account = Account(None, account_data)
    print("Building data for account {} ({})".format(account.name, account.local_id))

    cytoscape_json.append(account.cytoscape_data())
    for region_json in get_regions(account, outputfilter):
        node_count_per_region = 0
        region = Region(account, region_json)

        for vpc_json in get_vpcs(region, outputfilter):
            vpc = Vpc(region, vpc_json)

            for az_json in get_azs(vpc):
                # Availibility zones are not a per VPC construct, but VPC's can span AZ's,
                # so I make VPC a higher level construct
                az = Az(vpc, az_json)

                for subnet_json in get_subnets(az):
                    # If we ignore AZz, then tie the subnets up the VPC as the parent
                    if outputfilter["azs"]:
                        parent = az
                    else:
                        parent = vpc

                    subnet = Subnet(parent, subnet_json)

                    # Get EC2's
                    for ec2_json in get_ec2s(subnet):
                        ec2 = Ec2(subnet, ec2_json, outputfilter["collapse_by_tag"])
                        subnet.addChild(ec2)

                    # Get RDS's
                    for rds_json in get_rds_instances(subnet):
                        rds = Rds(subnet, rds_json)
                        if not outputfilter["read_replicas"] and rds.node_type == "rds_rr":
                            continue
                        subnet.addChild(rds)

                    # Get ELB's
                    for elb_json in get_elbs(subnet):
                        elb = Elb(subnet, elb_json)
                        subnet.addChild(elb)


                    # If there are leaves, then add this subnet to the final graph
                    if len(subnet.leaves) > 0:
                        node_count_per_region += len(subnet.leaves)
                        for leaf in subnet.leaves:
                            cytoscape_json.append(leaf.cytoscape_data())
                        cytoscape_json.append(subnet.cytoscape_data())
                        az.addChild(subnet)

                if az.has_leaves:
                    if outputfilter["azs"]:
                        cytoscape_json.append(az.cytoscape_data())
                    vpc.addChild(az)

            if vpc.has_leaves:
                cytoscape_json.append(vpc.cytoscape_data())
                region.addChild(vpc)

        if region.has_leaves:
            cytoscape_json.append(region.cytoscape_data())
            account.addChild(region)

        print("- {} nodes built in region {}".format(node_count_per_region, region.local_id))

    # Get VPC peerings
    for region in account.children:
        for vpc_peering in get_vpc_peerings(region):
            # For each peering, find the accepter and the requester
            accepter_id = vpc_peering["AccepterVpcInfo"]["VpcId"]
            requester_id = vpc_peering["RequesterVpcInfo"]["VpcId"]
            accepter = None
            requester = None
            for vpc in region.children:
                if accepter_id == vpc.local_id:
                    accepter = vpc
                if requester_id == vpc.local_id:
                    requester = vpc
            # If both have been found, add each as peers to one another
            if accepter and requester:
                accepter.addPeer(requester)
                requester.addPeer(accepter)

    # Get external cidr nodes
    cidrs = {}
    for cidr in get_external_cidrs(account, config):
        cidrs[cidr.arn] = cidr

    # Find connections between nodes
    # Only looking at Security Groups currently, which are a VPC level construct
    connections = {}
    for region in account.children:
        for vpc in region.children:
            for c, reasons in get_connections(cidrs, vpc, outputfilter).items():
                r = connections.get(c, [])
                r.extend(reasons)
                connections[c] = r

    # Add external cidr nodes
    used_cidrs = 0
    for _, cidr in cidrs.items():
        if cidr.is_used:
            used_cidrs += 1
            cytoscape_json.append(cidr.cytoscape_data())
    print("- {} external CIDRs built".format(used_cidrs))

    total_number_of_nodes = len(cytoscape_json)

    # Add the mapping to our graph
    for c, reasons in connections.items():
        if c.source == c.target:
            # Ensure we don't add connections with the same nodes on either side
            continue
        c._json = reasons
        cytoscape_json.append(c.cytoscape_data())
    print("- {} connections built".format(len(connections)))

    # Check if we have a lot of data, and if so, show a warning
    # Numbers chosen here are arbitrary
    MAX_NODES_FOR_WARNING = 200
    MAX_EDGES_FOR_WARNING = 500
    if total_number_of_nodes > MAX_NODES_FOR_WARNING or len(connections) > MAX_EDGES_FOR_WARNING:
        print("WARNING: There are {} total nodes and {} total edges.".format(total_number_of_nodes, len(connections)))
        print("  This will be difficult to display and may be too complex to make sense of.")
        print("  Consider reducing the number of items in the diagram by viewing a single")
        print("   region, ignoring internal edges, or other filtering.")

    return cytoscape_json
Esempio n. 2
0
def build_data_structure(account_data, config, outputfilter):
    cytoscape_json = []

    if outputfilter.get('mute', False):
        global MUTE
        MUTE = True

    account = Account(None, account_data)
    log("Building data for account {} ({})".format(account.name, account.local_id))

    cytoscape_json.append(account.cytoscape_data())
    
    # Iterate through each region and add all the VPCs, AZs, and Subnets
    for region_json in get_regions(account, outputfilter):
        nodes = {}
        region = Region(account, region_json)

        for vpc_json in get_vpcs(region, outputfilter):
            vpc = Vpc(region, vpc_json)

            for az_json in get_azs(vpc):
                # Availibility zones are not a per VPC construct, but VPC's can span AZ's,
                # so I make VPC a higher level construct
                az = Az(vpc, az_json)

                for subnet_json in get_subnets(az):
                    # If we ignore AZz, then tie the subnets up the VPC as the parent
                    if outputfilter["azs"]:
                        parent = az
                    else:
                        parent = vpc

                    subnet = Subnet(parent, subnet_json)
                    az.addChild(subnet)
                vpc.addChild(az)
            region.addChild(vpc)
        account.addChild(region)

        #
        # In each region, iterate through all the resource types
        #

        # EC2 nodes
        for ec2_json in get_ec2s(region):
            node = Ec2(region, ec2_json, outputfilter["collapse_by_tag"], outputfilter["collapse_asgs"])
            nodes[node.arn] = node
        
        # RDS nodes
        for rds_json in get_rds_instances(region):
            node = Rds(region, rds_json)
            if not outputfilter["read_replicas"] and node.node_type == "rds_rr":
                continue
            nodes[node.arn] = node

        # ELB nodes
        for elb_json in get_elbs(region):
            node = Elb(region, elb_json)
            nodes[node.arn] = node
        
        for elb_json in get_elbv2s(region):
            node = Elbv2(region, elb_json)
            nodes[node.arn] = node

        # PrivateLink and VPC Endpoints
        for vpc_endpoint_json in get_vpc_endpoints(region):
            node = VpcEndpoint(region, vpc_endpoint_json)
            nodes[node.arn] = node

        # ECS tasks
        for ecs_json in get_ecs_tasks(region):
            node = Ecs(region, ecs_json)
            nodes[node.arn] = node
        
        # Lambda functions
        for lambda_json in get_lambda_functions(region):
            node = Lambda(region, lambda_json)
            nodes[node.arn] = node

        # Redshift clusters
        for node_json in get_redshift(region):
            node = Redshift(region, node_json)
            nodes[node.arn] = node

        # ElasticSearch clusters
        for node_json in get_elasticsearch(region):
            node = ElasticSearch(region, node_json)
            nodes[node.arn] = node

        # Filter out nodes based on tags
        if len(outputfilter.get("tags", [])) > 0:
            for node_id in list(nodes):
                has_match = False
                node = nodes[node_id]
                # For each node, look to see if its tags match one of the tag sets
                # Ex. --tags Env=Prod --tags Team=Dev,Name=Bastion
                for tag_set in outputfilter.get("tags", []):
                    conditions = [c.split("=") for c in tag_set.split(",")]
                    condition_matches = 0
                    # For a tag set, see if all conditions match, ex. [["Team","Dev"],["Name","Bastion"]]
                    for pair in conditions:
                        # Given ["Team","Dev"], see if it matches one of the tags in the node
                        for tag in node.tags:
                            if tag.get('Key','') == pair[0] and tag.get('Value','') == pair[1]:
                                condition_matches += 1
                    # We have a match if all of the conditions matched
                    if condition_matches == len(conditions):
                        has_match = True
                
                # If there were no matches, remove the node
                if not has_match:
                    del nodes[node_id]

        # Add the nodes to their respective subnets
        for node_arn in list(nodes):
            node = nodes[node_arn]
            add_node_to_subnets(region, node, nodes)

        # From the root of the tree (the account), add in the children if there are leaves
        # If not, mark the item for removal
        if region.has_leaves:
            cytoscape_json.append(region.cytoscape_data())

            region_children_to_remove = set()
            for vpc in region.children:
                if vpc.has_leaves:
                    cytoscape_json.append(vpc.cytoscape_data())
                
                    vpc_children_to_remove = set()
                    for vpc_child in vpc.children:
                        if vpc_child.has_leaves:
                            if outputfilter["azs"]:
                                cytoscape_json.append(vpc_child.cytoscape_data())
                            elif vpc_child.node_type != 'az':
                                # Add VPC children that are not AZs, such as Gateway endpoints
                                cytoscape_json.append(vpc_child.cytoscape_data())
                        
                            az_children_to_remove = set()
                            for subnet in vpc_child.children:
                                if subnet.has_leaves:
                                    cytoscape_json.append(subnet.cytoscape_data())

                                    for leaf in subnet.leaves:
                                        cytoscape_json.append(leaf.cytoscape_data(subnet.arn))
                                else:
                                    az_children_to_remove.add(subnet)
                            for subnet in az_children_to_remove:
                                vpc_child.removeChild(subnet)
                        else:
                            vpc_children_to_remove.add(vpc_child)
                    for az in vpc_children_to_remove:
                        vpc.removeChild(az)
                else:
                    region_children_to_remove.add(vpc)
            for vpc in region_children_to_remove:
                region.removeChild(vpc)

        log("- {} nodes built in region {}".format(len(nodes), region.local_id))

    # Get VPC peerings
    for region in account.children:
        for vpc_peering in get_vpc_peerings(region):
            # For each peering, find the accepter and the requester
            accepter_id = vpc_peering["AccepterVpcInfo"]["VpcId"]
            requester_id = vpc_peering["RequesterVpcInfo"]["VpcId"]
            accepter = None
            requester = None
            for vpc in region.children:
                if accepter_id == vpc.local_id:
                    accepter = vpc
                if requester_id == vpc.local_id:
                    requester = vpc
            # If both have been found, add each as peers to one another
            if accepter and requester:
                accepter.addPeer(requester)
                requester.addPeer(accepter)

    # Get external cidr nodes
    cidrs = {}
    for cidr in get_external_cidrs(account, config):
        cidrs[cidr.arn] = cidr

    # Find connections between nodes
    # Only looking at Security Groups currently, which are a VPC level construct
    connections = {}
    for region in account.children:
        for vpc in region.children:
            for c, reasons in get_connections(cidrs, vpc, outputfilter).items():
                r = connections.get(c, [])
                r.extend(reasons)
                connections[c] = r

    #
    # Collapse CIDRs
    #

    # Get a list of the current CIDRs
    current_cidrs = []
    for cidr_string in cidrs:
        current_cidrs.append(cidr_string)

    # Iterate through them
    for cidr_string in current_cidrs:
        # Find CIDRs in the config that our CIDR falls inside
        # It may fall inside multiple ranges
        matching_known_cidrs = {}
        for named_cidr in config["cidrs"]:
            if IPNetwork(cidr_string) in IPNetwork(named_cidr):
                # Match found
                matching_known_cidrs[named_cidr] = IPNetwork(named_cidr).size

        if len(matching_known_cidrs) > 0:
            # A match was found. Find the smallest matching range.
            sorted_matches = sorted(matching_known_cidrs.items(), key=operator.itemgetter(1))
            # Get first item to get (CIDR,size); and first item of that to get just the CIDR
            smallest_matched_cidr_string = sorted_matches[0][0]
            smallest_matched_cidr_name = config["cidrs"][smallest_matched_cidr_string]['name']

            # Check if we have a CIDR node that doesn't match the smallest one possible.
            if cidrs[cidr_string].name != config["cidrs"][smallest_matched_cidr_string]['name']:
                # See if we need to create the larger known range
                if cidrs.get(smallest_matched_cidr_string, "") == "":
                    cidrs[smallest_matched_cidr_string] = Cidr(smallest_matched_cidr_string, smallest_matched_cidr_name)

                # The existing CIDR node needs to be removed and rebuilt as the larger known range
                del cidrs[cidr_string]

                # Get the larger known range
                new_source = cidrs[smallest_matched_cidr_string]
                new_source.is_used = True

                # Find all the connections to the old node
                connections_to_remove = []
                for c in connections:
                    if c.source.node_type == 'ip' and c.source.arn == cidr_string:
                        connections_to_remove.append(c)
                
                # Create new connections to the new node
                for c in connections_to_remove:
                    r = connections[c]
                    del connections[c]
                    connections[Connection(new_source, c._target)] = r

    # Add external cidr nodes
    used_cidrs = 0
    for _, cidr in cidrs.items():
        if cidr.is_used:
            used_cidrs += 1
            cytoscape_json.append(cidr.cytoscape_data())
    log("- {} external CIDRs built".format(used_cidrs))

    total_number_of_nodes = len(cytoscape_json)

    # Add the mapping to our graph
    for c, reasons in connections.items():
        if c.source == c.target:
            # Ensure we don't add connections with the same nodes on either side
            continue
        c._json = reasons
        cytoscape_json.append(c.cytoscape_data())
    log("- {} connections built".format(len(connections)))

    # Check if we have a lot of data, and if so, show a warning
    # Numbers chosen here are arbitrary
    MAX_NODES_FOR_WARNING = 200
    MAX_EDGES_FOR_WARNING = 500
    if total_number_of_nodes > MAX_NODES_FOR_WARNING or len(connections) > MAX_EDGES_FOR_WARNING:
        log("WARNING: There are {} total nodes and {} total edges.".format(total_number_of_nodes, len(connections)))
        log("  This will be difficult to display and may be too complex to make sense of.")
        log("  Consider reducing the number of items in the diagram by viewing a single")
        log("   region, ignoring internal edges, or other filtering.")

    return cytoscape_json