def get_resource_nodes(region, outputfilter):
    nodes = {}
    # EC2 nodes
    for ec2_json in get_ec2s(region):
        node = Ec2(
            outputfilter.get("collapse_by_tag", False),
            outputfilter.get("collapse_asgs", False),
        nodes[node.arn] = node

    # RDS nodes
    for rds_json in get_rds_instances(region):
        node = Rds(region, rds_json)
        if not outputfilter.get("read_replicas",
                                False) and node.node_type == "rds_rr":
        nodes[node.arn] = node

    # ELB nodes
    for elb_json in get_elbs(region):
        node = Elb(region, elb_json)
        nodes[node.arn] = node

    for elb_json in get_elbv2s(region):
        node = Elbv2(region, elb_json)
        nodes[node.arn] = node

    # PrivateLink and VPC Endpoints
    for vpc_endpoint_json in get_vpc_endpoints(region):
        node = VpcEndpoint(region, vpc_endpoint_json)
        nodes[node.arn] = node

    # ECS tasks
    for ecs_json in get_ecs_tasks(region):
        node = Ecs(region, ecs_json)
        nodes[node.arn] = node

    # Lambda functions
    for lambda_json in get_lambda_functions(region):
        node = Lambda(region, lambda_json)
        nodes[node.arn] = node

    # Redshift clusters
    for node_json in get_redshift(region):
        node = Redshift(region, node_json)
        nodes[node.arn] = node

    # ElasticSearch clusters
    for node_json in get_elasticsearch(region):
        node = ElasticSearch(region, node_json)
        nodes[node.arn] = node

    return nodes
def build_data_structure(account_data, config, outputfilter):
    cytoscape_json = []

    if outputfilter.get('mute', False):
        global MUTE
        MUTE = True

    account = Account(None, account_data)
    log("Building data for account {} ({})".format(account.name, account.local_id))

    # Iterate through each region and add all the VPCs, AZs, and Subnets
    for region_json in get_regions(account, outputfilter):
        nodes = {}
        region = Region(account, region_json)

        for vpc_json in get_vpcs(region, outputfilter):
            vpc = Vpc(region, vpc_json)

            for az_json in get_azs(vpc):
                # Availibility zones are not a per VPC construct, but VPC's can span AZ's,
                # so I make VPC a higher level construct
                az = Az(vpc, az_json)

                for subnet_json in get_subnets(az):
                    # If we ignore AZz, then tie the subnets up the VPC as the parent
                    if outputfilter["azs"]:
                        parent = az
                        parent = vpc

                    subnet = Subnet(parent, subnet_json)

        # In each region, iterate through all the resource types

        # EC2 nodes
        for ec2_json in get_ec2s(region):
            node = Ec2(region, ec2_json, outputfilter["collapse_by_tag"], outputfilter["collapse_asgs"])
            nodes[node.arn] = node
        # RDS nodes
        for rds_json in get_rds_instances(region):
            node = Rds(region, rds_json)
            if not outputfilter["read_replicas"] and node.node_type == "rds_rr":
            nodes[node.arn] = node

        # ELB nodes
        for elb_json in get_elbs(region):
            node = Elb(region, elb_json)
            nodes[node.arn] = node
        for elb_json in get_elbv2s(region):
            node = Elbv2(region, elb_json)
            nodes[node.arn] = node

        # PrivateLink and VPC Endpoints
        for vpc_endpoint_json in get_vpc_endpoints(region):
            node = VpcEndpoint(region, vpc_endpoint_json)
            nodes[node.arn] = node

        # ECS tasks
        for ecs_json in get_ecs_tasks(region):
            node = Ecs(region, ecs_json)
            nodes[node.arn] = node
        # Lambda functions
        for lambda_json in get_lambda_functions(region):
            node = Lambda(region, lambda_json)
            nodes[node.arn] = node

        # Redshift clusters
        for node_json in get_redshift(region):
            node = Redshift(region, node_json)
            nodes[node.arn] = node

        # ElasticSearch clusters
        for node_json in get_elasticsearch(region):
            node = ElasticSearch(region, node_json)
            nodes[node.arn] = node

        # Filter out nodes based on tags
        if len(outputfilter.get("tags", [])) > 0:
            for node_id in list(nodes):
                has_match = False
                node = nodes[node_id]
                # For each node, look to see if its tags match one of the tag sets
                # Ex. --tags Env=Prod --tags Team=Dev,Name=Bastion
                for tag_set in outputfilter.get("tags", []):
                    conditions = [c.split("=") for c in tag_set.split(",")]
                    condition_matches = 0
                    # For a tag set, see if all conditions match, ex. [["Team","Dev"],["Name","Bastion"]]
                    for pair in conditions:
                        # Given ["Team","Dev"], see if it matches one of the tags in the node
                        for tag in node.tags:
                            if tag.get('Key','') == pair[0] and tag.get('Value','') == pair[1]:
                                condition_matches += 1
                    # We have a match if all of the conditions matched
                    if condition_matches == len(conditions):
                        has_match = True
                # If there were no matches, remove the node
                if not has_match:
                    del nodes[node_id]

        # Add the nodes to their respective subnets
        for node_arn in list(nodes):
            node = nodes[node_arn]
            add_node_to_subnets(region, node, nodes)

        # From the root of the tree (the account), add in the children if there are leaves
        # If not, mark the item for removal
        if region.has_leaves:

            region_children_to_remove = set()
            for vpc in region.children:
                if vpc.has_leaves:
                    vpc_children_to_remove = set()
                    for vpc_child in vpc.children:
                        if vpc_child.has_leaves:
                            if outputfilter["azs"]:
                            elif vpc_child.node_type != 'az':
                                # Add VPC children that are not AZs, such as Gateway endpoints
                            az_children_to_remove = set()
                            for subnet in vpc_child.children:
                                if subnet.has_leaves:

                                    for leaf in subnet.leaves:
                            for subnet in az_children_to_remove:
                    for az in vpc_children_to_remove:
            for vpc in region_children_to_remove:

        log("- {} nodes built in region {}".format(len(nodes), region.local_id))

    # Get VPC peerings
    for region in account.children:
        for vpc_peering in get_vpc_peerings(region):
            # For each peering, find the accepter and the requester
            accepter_id = vpc_peering["AccepterVpcInfo"]["VpcId"]
            requester_id = vpc_peering["RequesterVpcInfo"]["VpcId"]
            accepter = None
            requester = None
            for vpc in region.children:
                if accepter_id == vpc.local_id:
                    accepter = vpc
                if requester_id == vpc.local_id:
                    requester = vpc
            # If both have been found, add each as peers to one another
            if accepter and requester:

    # Get external cidr nodes
    cidrs = {}
    for cidr in get_external_cidrs(account, config):
        cidrs[cidr.arn] = cidr

    # Find connections between nodes
    # Only looking at Security Groups currently, which are a VPC level construct
    connections = {}
    for region in account.children:
        for vpc in region.children:
            for c, reasons in get_connections(cidrs, vpc, outputfilter).items():
                r = connections.get(c, [])
                connections[c] = r

    # Collapse CIDRs

    # Get a list of the current CIDRs
    current_cidrs = []
    for cidr_string in cidrs:

    # Iterate through them
    for cidr_string in current_cidrs:
        # Find CIDRs in the config that our CIDR falls inside
        # It may fall inside multiple ranges
        matching_known_cidrs = {}
        for named_cidr in config["cidrs"]:
            if IPNetwork(cidr_string) in IPNetwork(named_cidr):
                # Match found
                matching_known_cidrs[named_cidr] = IPNetwork(named_cidr).size

        if len(matching_known_cidrs) > 0:
            # A match was found. Find the smallest matching range.
            sorted_matches = sorted(matching_known_cidrs.items(), key=operator.itemgetter(1))
            # Get first item to get (CIDR,size); and first item of that to get just the CIDR
            smallest_matched_cidr_string = sorted_matches[0][0]
            smallest_matched_cidr_name = config["cidrs"][smallest_matched_cidr_string]['name']

            # Check if we have a CIDR node that doesn't match the smallest one possible.
            if cidrs[cidr_string].name != config["cidrs"][smallest_matched_cidr_string]['name']:
                # See if we need to create the larger known range
                if cidrs.get(smallest_matched_cidr_string, "") == "":
                    cidrs[smallest_matched_cidr_string] = Cidr(smallest_matched_cidr_string, smallest_matched_cidr_name)

                # The existing CIDR node needs to be removed and rebuilt as the larger known range
                del cidrs[cidr_string]

                # Get the larger known range
                new_source = cidrs[smallest_matched_cidr_string]
                new_source.is_used = True

                # Find all the connections to the old node
                connections_to_remove = []
                for c in connections:
                    if c.source.node_type == 'ip' and c.source.arn == cidr_string:
                # Create new connections to the new node
                for c in connections_to_remove:
                    r = connections[c]
                    del connections[c]
                    connections[Connection(new_source, c._target)] = r

    # Add external cidr nodes
    used_cidrs = 0
    for _, cidr in cidrs.items():
        if cidr.is_used:
            used_cidrs += 1
    log("- {} external CIDRs built".format(used_cidrs))

    total_number_of_nodes = len(cytoscape_json)

    # Add the mapping to our graph
    for c, reasons in connections.items():
        if c.source == c.target:
            # Ensure we don't add connections with the same nodes on either side
        c._json = reasons
    log("- {} connections built".format(len(connections)))

    # Check if we have a lot of data, and if so, show a warning
    # Numbers chosen here are arbitrary
    if total_number_of_nodes > MAX_NODES_FOR_WARNING or len(connections) > MAX_EDGES_FOR_WARNING:
        log("WARNING: There are {} total nodes and {} total edges.".format(total_number_of_nodes, len(connections)))
        log("  This will be difficult to display and may be too complex to make sense of.")
        log("  Consider reducing the number of items in the diagram by viewing a single")
        log("   region, ignoring internal edges, or other filtering.")

    return cytoscape_json