def collect(cls, api_client: client.ApiClient, graph: Graph): for response in cls.list(api_client): kwargs, search_results = default_attributes( response, cls.attr_map, cls.search_map, graph) parent = graph.root namespace = response.metadata.namespace resource = cls(**kwargs) if namespace: ns = graph.search_first_all({ "kind": "kubernetes_namespace", "name": namespace }) if ns: parent = ns log.debug(f"Collected {resource.rtdname} in {parent.rtdname}") graph.add_resource(parent, resource) parent_map = { True: cls.predecessor_names, False: cls.successor_names } for is_parent, sr_names in parent_map.items(): for sr_name in sr_names: if sr_name in search_results: srs = search_results[sr_name] for sr in srs: if is_parent: src = sr dst = resource else: src = resource dst = sr graph.add_edge(src, dst) else: if sr_name in cls.search_map: graph_search = cls.search_map[sr_name] attr = graph_search[0] value_name = graph_search[1] value = get_response_data(response, value_name) if value: if isinstance(value, list): values = value for value in values: resource.add_deferred_connection( {attr: value}, is_parent) elif isinstance(value, str): resource.add_deferred_connection( {attr: value}, is_parent) else: log.error(( "Unable to add deferred connection for" f" value {value} of type {type(value)}" )) else: log.error( f"Key {sr_name} is missing in search_map") post_process = getattr(cls, "post_process", None) if callable(post_process): post_process(resource, graph)
def test_baseresource_chksum(): g = Graph() a = SomeTestResource("a", {}) with pytest.raises(RuntimeError): a.chksum g.add_node(a) assert isinstance(a.chksum, str)
def test_graph_export_iterator(): g = Graph(root=GraphRoot("root", {})) a = SomeTestResource("a", {}) g.add_resource(g.root, a) assert getrefcount(g) == 2 gei = GraphExportIterator(g) assert getrefcount(g) == 3 gei.export_graph() assert getrefcount(g) == 2 assert len(list(gei)) == 3
def collect(self) -> None: """Runs the actual resource collection across all resource collectors. Resource collectors add their resources to the local `self.graph` graph. """ log.info("Collecting DigitalOcean resources for team %s", self.team.id) self.graph = Graph(root=self.team) collectors = set(self.collector_set) log.debug((f"Running the following collectors in {self.team.rtdname}:" f" {', '.join(collectors)}")) for collector_name, collector in self.mandatory_collectors: if collector_name in collectors: log.info(f"Collecting {collector_name} in {self.team.rtdname}") collector() regions = [ r for r in self.graph.nodes if isinstance(r, DigitalOceanRegion) ] for region in regions: for collector_name, collector in self.region_collectors: if collector_name in collectors: log.info( (f"Collecting {collector_name} in {region.rtdname}" f" {self.team.rtdname}")) collector(region=region) for collector_name, collector in self.global_collectors: if collector_name in collectors: log.info(f"Collecting {collector_name} in {self.team.rtdname}") collector() remove_nodes = set() def rmnodes(cls: Any) -> None: for node in self.graph.nodes: if isinstance(node, cls) and not any( True for _ in self.graph.successors(node)): remove_nodes.add(node) for node in remove_nodes: self.graph.remove_node(node) log.debug( f"Removing {len(remove_nodes)} unreferenced nodes of type {cls}" ) remove_nodes.clear() # since regions API will return all available regions, we need to remove # the regions that are not used by any resources rmnodes(DigitalOceanRegion)
def test_graph_container(): gc = GraphContainer(cache_graph=False) g = Graph() n1 = SomeTestResource("foo", {}) n2 = SomeTestResource("bar", {}) g.add_node(n1) gc.graph.add_resource(gc.GRAPH_ROOT, n2) gc.add(g) gc.graph.add_edge(n1, n2) assert len(gc.graph.nodes) == 3 assert len(gc.graph.edges) == 2 assert gc.graph.search_first("id", "bar") == n2 assert gc.graph.search_first_parent_class(n2, SomeTestResource) == n1
def graph(self, query: str) -> Graph: def process_data_line(data: dict, graph: Graph): """Process a single line of resotocore graph data""" if data.get("type") == "node": node_id = data.get("id") node = node_from_dict(data) node_mapping[node_id] = node log.debug(f"Adding node {node} to the graph") graph.add_node(node) if node.kind == "graph_root": log.debug(f"Setting graph root {node}") graph.root = node elif data.get("type") == "edge": node_from = data.get("from") node_to = data.get("to") edge_type = EdgeType.from_value(data.get("edge_type")) if node_from not in node_mapping or node_to not in node_mapping: raise ValueError(f"One of {node_from} -> {node_to} unknown") graph.add_edge( node_mapping[node_from], node_mapping[node_to], edge_type=edge_type ) graph = Graph() node_mapping = {} for data in self.query(query): try: process_data_line(data, graph) except ValueError as e: log.error(e) continue sanitize(graph) return graph
def update_model(graph: Graph, resotocore_base_uri: str, dump_json: bool = False, tempdir: str = None) -> None: model_uri = f"{resotocore_base_uri}/model" log.debug(f"Updating model via {model_uri}") model_json = json.dumps(graph.export_model(), indent=4) if dump_json: ts = datetime.now().strftime("%Y-%m-%d-%H-%M") with tempfile.NamedTemporaryFile( prefix=f"resoto-model-{ts}-", suffix=".json", delete=not dump_json, dir=tempdir, ) as model_outfile: log.info(f"Writing model json to file {model_outfile.name}") model_outfile.write(model_json.encode()) headers = { "Content-Type": "application/json", } if getattr(ArgumentParser.args, "psk", None): encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk) r = requests.patch(model_uri, data=model_json, headers=headers) if r.status_code != 200: log.error(r.content) raise RuntimeError(f"Failed to create model: {r.content}")
def test_resotocore(): recorded_headers: Dict[str, str] = {} def make_query(request: requests.Request) -> requests.Response: nonlocal recorded_headers recorded_headers = request.headers resp = requests.Response() resp.status_code = 200 resp._content = str.encode(json.dumps("OK")) return resp config = cast( Config, FakeConfig( values={ "resotoworker": { "graph": "resoto", "debug_dump_json": False, "tempdir": "/tmp", "graph_merge_kind": "foo_kind", }, "running_config": None, } ), ) core = Resotocore(make_query, config) core.send_to_resotocore(Graph(), "task_123") print(recorded_headers) assert recorded_headers["Resoto-Worker-Task-Id"] == "task_123"
def prepare_graph(do_client) -> Graph: cloud = Cloud("do") team = DigitalOceanTeam(id="test_team", urn="do:team:test_team") plugin_instance = DigitalOceanTeamCollector(team, do_client) plugin_instance.collect() cloud_graph = Graph(root=cloud) graph = Graph(root=GraphRoot("root", {})) cloud_graph.merge(plugin_instance.graph) graph.merge(cloud_graph) sanitize(graph) return graph
def collect(collectors: List[BaseCollectorPlugin]) -> Graph: graph = Graph(root=GraphRoot("root", {})) max_workers = (len(collectors) if len(collectors) < self._config.resotoworker.pool_size else self._config.resotoworker.pool_size) if max_workers == 0: log.error( "No workers configured or no collector plugins loaded - skipping collect" ) return pool_args = {"max_workers": max_workers} if self._config.resotoworker.fork_process: pool_args["mp_context"] = multiprocessing.get_context("spawn") pool_args["initializer"] = resotolib.proc.initializer pool_executor = futures.ProcessPoolExecutor collect_args = { "args": ArgumentParser.args, "running_config": self._config.running_config, } else: pool_executor = futures.ThreadPoolExecutor collect_args = {} with pool_executor(**pool_args) as executor: wait_for = [ executor.submit( collect_plugin_graph, collector, **collect_args, ) for collector in collectors ] for future in futures.as_completed(wait_for): cluster_graph = future.result() if not isinstance(cluster_graph, Graph): log.error( f"Skipping invalid cluster_graph {type(cluster_graph)}" ) continue graph.merge(cluster_graph) sanitize(graph) return graph
def __init__(self, cluster: KubernetesCluster, cluster_config: client.Configuration) -> None: """ Args: cluster: The K8S cluster resource object this cluster collector is going to collect. """ self.cluster = cluster self.config = cluster_config self.api_client = client.ApiClient(self.config) self.graph = Graph(root=self.cluster)
def update_users_groups_channels(self, graph: Graph): log.debug("Updating Users Groups and Channels") tmp_users = {} tmp_emails = {} tmp_usergroups = {} tmp_channels = {} for user in graph.search("kind", "slack_user"): tmp_users[user.name] = user if user.email: tmp_emails[user.email] = user for usergroup in graph.search("kind", "slack_usergroup"): if usergroup.is_usergroup: tmp_usergroups[usergroup.name] = usergroup for channel in graph.search("kind", "slack_conversation"): if channel.is_channel: tmp_channels[channel.name] = channel self.users2id = tmp_users self.emails2id = tmp_emails self.usergroups2id = tmp_usergroups self.channels2id = tmp_channels
def process_data_line(data: dict, graph: Graph): """Process a single line of resotocore graph data""" if data.get("type") == "node": node_id = data.get("id") node = node_from_dict(data) node_mapping[node_id] = node log.debug(f"Adding node {node} to the graph") graph.add_node(node) if node.kind == "graph_root": log.debug(f"Setting graph root {node}") graph.root = node elif data.get("type") == "edge": node_from = data.get("from") node_to = data.get("to") edge_type = EdgeType.from_value(data.get("edge_type")) if node_from not in node_mapping or node_to not in node_mapping: raise ValueError(f"One of {node_from} -> {node_to} unknown") graph.add_edge( node_mapping[node_from], node_mapping[node_to], edge_type=edge_type )
def test_graph(): g = Graph() n1 = SomeTestResource("foo", {}) n2 = SomeTestResource("bar", {}) g.add_node(n1) g.add_node(n2) g.add_edge(n1, n2) assert len(g.nodes) == 2 assert len(g.edges) == 1
def default_attributes( response, attr_map: Dict, search_map: Dict, graph: Graph ) -> Dict: kwargs = { "id": response.metadata.uid, "name": response.metadata.name, "ctime": response.metadata.creation_timestamp, "self_link": response.metadata.self_link, "tags": response.metadata.labels if response.metadata.labels else {}, "_api_response": pformat(response), } search_results = {} for map_to, map_from in attr_map.items(): data = get_response_data(response, map_from) if data is None: log.debug(f"Unable to set {map_to}, attribute {map_from} not in result") continue kwargs[map_to] = data for map_to, search_data in search_map.items(): search_attr = search_data[0] search_value_name = search_data[1] search_value = get_response_data(response, search_value_name) if search_value is None: continue if isinstance(search_value, list): search_values = search_value else: search_values = [search_value] for search_value in search_values: search_result = graph.search_first(search_attr, search_value) if search_result: if map_to not in search_results: search_results[map_to] = [] search_results[map_to].append(search_result) if ( map_to not in kwargs and map_to in search_results and not str(map_to).startswith("_") ): search_result = search_results[map_to] if len(search_result) == 1: kwargs[map_to] = search_result[0] else: kwargs[map_to] = list(search_result) return kwargs, search_results
def alarm_cleanup(self, graph: Graph): log.info("AWS Cloudwatch Alarms cleanup called") for node in graph.nodes: if node.protected or not isinstance(node, AWSCloudwatchAlarm): continue cloud = node.cloud(graph) account = node.account(graph) region = node.region(graph) log_prefix = ( f"Found {node.rtdname} in cloud {cloud.name} account {account.dname} " f"region {region.name}.") if len(self.config) > 0: if (cloud.id not in self.config or account.id not in self.config[cloud.id]): log.debug(( f"{log_prefix} Account not found in config - ignoring." )) continue should_clean = False i = None log_msg = log_prefix for dimension in node.dimensions: if dimension.get("Name") == "InstanceId": instance_id = dimension.get("Value") i = graph.search_first_all({ "kind": "aws_ec2_instance", "id": instance_id }) if isinstance( i, AWSEC2Instance) and i.instance_status not in ( "terminated"): should_clean = False break else: should_clean = True log_msg += f" Referenced EC2 instance {instance_id} not found." if not should_clean: continue log.debug(f"{log_msg} - cleaning alarm") node.clean = True
def test_multidigraph(): g = Graph() a = SomeTestResource("a", {}) b = SomeTestResource("b", {}) c = SomeTestResource("c", {}) d = SomeTestResource("d", {}) g.add_resource(a, b) g.add_resource(b, c) g.add_resource(c, d) g.add_edge(b, a, edge_type=EdgeType.delete) g.add_edge(b, d, edge_type=EdgeType.delete) assert len(g.nodes) == 4 assert len(g.edges) == 5 assert len(list(g.successors(a))) == 1 g.add_edge(a, b) assert len(list(g.successors(a))) == 1 assert len(list(g.predecessors(b))) == 1 assert len(list(g.predecessors(a))) == 0 assert len(list(g.successors(b))) == 1 assert len(list(g.successors(c))) == 1 assert len(list(g.successors(d))) == 0 assert len(list(g.predecessors(a, edge_type=EdgeType.delete))) == 1 assert len(list(g.successors(b, edge_type=EdgeType.delete))) == 2 assert len(list(g.successors(a, edge_type=EdgeType.delete))) == 0 assert len(list(g.predecessors(b, edge_type=EdgeType.delete))) == 0 assert len(list(g.ancestors(a))) == 0 assert len(list(g.descendants(a))) == 3 assert len(list(g.descendants(a, edge_type=EdgeType.delete))) == 0 assert len(list(g.descendants(b))) == 2 assert len(list(g.descendants(b, edge_type=EdgeType.delete))) == 2 assert g.is_dag_per_edge_type() g.add_edge(b, a) assert g.is_dag_per_edge_type() is False
def test_graph_merge(): rg1 = Graph() rg2 = Graph() a = SomeTestResource("a", {}) b = SomeTestResource("b", {}) c = SomeTestResource("c", {}) d = SomeTestResource("d", {}) rg1.add_node(a) rg1.add_node(b) rg2.add_node(c) rg2.add_node(d) rg1.add_edge(a, b, edge_type=EdgeType.delete) rg2.add_edge(c, d, edge_type=EdgeType.delete) rg1.merge(rg2) assert len(rg1.nodes) == 4 assert len(rg1.edges) == 2 for edge in rg1.edges: assert len(edge) == 3 key = edge[2] assert len(key) == 3 edge_type = key[2] assert edge_type == EdgeType.delete
class DigitalOceanTeamCollector: """Collects a single DigitalOcean team Responsible for collecting all the resources of an individual team. Builds up its own local graph which is then taken by collect_project() and merged with the plugin graph. This way we can have many instances of DigitalOceanCollectorPlugin running in parallel. All building up indivetual graphs which in the end are merged to a final graph containing all DigitalOcean resources """ def __init__(self, team: DigitalOceanTeam, client: StreamingWrapper) -> None: self.client = client self.team = team # Mandatory collectors are always collected regardless of whether # they were included by --do-collect or excluded by --do-no-collect self.mandatory_collectors: List[Tuple[str, Callable[..., None]]] = [ ("regions", self.collect_regions) ] # Global collectors are resources that are either specified on a global level # as opposed to a per zone or per region level or they are zone/region # resources that provide a aggregatedList() function returning all resources # for all zones/regions. self.global_collectors: List[Tuple[str, Callable[..., None]]] = [ ("tags", self.collect_tags), ("vpcs", self.collect_vpcs), ("instances", self.collect_instances), ("volumes", self.collect_volumes), ("databases", self.collect_databases), ("k8s_clusters", self.collect_k8s_clusters), ("snapshots", self.collect_snapshots), ("load_balancers", self.collect_load_balancers), ("floating_ips", self.collect_floating_ips), ("project", self.collect_projects), ("apps", self.collect_apps), ("cdn_endpoints", self.collect_cdn_endpoints), ("certificates", self.collect_certificates), ("container_registry", self.collect_container_registry), ("ssh_keys", self.collect_ssh_keys), ("domains", self.collect_domains), ("firewalls", self.collect_firewalls), ("alert_policies", self.collect_alert_policies), ] self.region_collectors: List[Tuple[str, Callable[..., None]]] = [ ("spaces", self.collect_spaces), ] self.all_collectors = dict(self.mandatory_collectors) self.all_collectors.update(self.region_collectors) self.all_collectors.update(self.global_collectors) self.collector_set = set(self.all_collectors.keys()) def collect(self) -> None: """Runs the actual resource collection across all resource collectors. Resource collectors add their resources to the local `self.graph` graph. """ log.info("Collecting DigitalOcean resources for team %s", self.team.id) self.graph = Graph(root=self.team) collectors = set(self.collector_set) log.debug((f"Running the following collectors in {self.team.rtdname}:" f" {', '.join(collectors)}")) for collector_name, collector in self.mandatory_collectors: if collector_name in collectors: log.info(f"Collecting {collector_name} in {self.team.rtdname}") collector() regions = [ r for r in self.graph.nodes if isinstance(r, DigitalOceanRegion) ] for region in regions: for collector_name, collector in self.region_collectors: if collector_name in collectors: log.info( (f"Collecting {collector_name} in {region.rtdname}" f" {self.team.rtdname}")) collector(region=region) for collector_name, collector in self.global_collectors: if collector_name in collectors: log.info(f"Collecting {collector_name} in {self.team.rtdname}") collector() remove_nodes = set() def rmnodes(cls: Any) -> None: for node in self.graph.nodes: if isinstance(node, cls) and not any( True for _ in self.graph.successors(node)): remove_nodes.add(node) for node in remove_nodes: self.graph.remove_node(node) log.debug( f"Removing {len(remove_nodes)} unreferenced nodes of type {cls}" ) remove_nodes.clear() # since regions API will return all available regions, we need to remove # the regions that are not used by any resources rmnodes(DigitalOceanRegion) def default_attributes( self, result: Dict[str, Any], attr_map: Dict[str, Any], search_map: Dict[str, Any], ) -> Tuple[Dict[str, Any], Dict[str, Any]]: """See a similar method in the GCPCollectorPlugin""" # The following are default attributes that are passed to every # BaseResource() if found in `result` def extract_tags(result: Dict[str, Any]) -> Dict[str, Optional[str]]: raw_tags = result.get("tags", []) raw_tags = raw_tags if raw_tags else [] tags = [parse_tag(tag) for tag in raw_tags if tag] return dict(tags) if tags else {} kwargs = { "id": str(result.get("id")), "tags": extract_tags(result), "name": result.get("name"), "ctime": iso2datetime(result.get("created_at")), "mtime": iso2datetime(result.get("updated_at")), "_account": self.team, } if attr_map is not None: for map_to, attribute_selector in attr_map.items(): data = get_result_data(result, attribute_selector) if data is None: log.debug(f"Attribute {attribute_selector} not in result") continue log.debug(f"Found attribute {map_to}: {pformat(data)}") kwargs[map_to] = data # By default we search for a resources region and/or zone default_search_map: Dict[str, Any] = {} search_results: Dict[str, Any] = {} if search_map is None: search_map = dict(default_search_map) else: updated_search_map = dict(default_search_map) updated_search_map.update(search_map) search_map = updated_search_map for map_to, search_data in search_map.items(): search_attr = search_data[0] search_value_name = search_data[1] search_value = get_result_data(result, search_value_name) if search_value is None: continue if isinstance(search_value, List): search_values = search_value else: search_values = [search_value] for search_value in search_values: search_result = self.graph.search_first( search_attr, search_value) if search_result: if map_to not in search_results: search_results[map_to] = [] search_results[map_to].append(search_result) if map_to not in kwargs and map_to in search_results and not str( map_to).startswith("__"): search_result = search_results[map_to] if len(search_result) == 1: kwargs[map_to] = search_result[0] else: kwargs[map_to] = list(search_result) # If the resource was referencing a zone but not a region we look up its # region based on the zone information we found. # E.g. if we know a disk is in zone us-central1-a then we can find # the region us-central1 from that. if "_zone" in kwargs and "_region" not in kwargs and isinstance( kwargs["_zone"], BaseResource): region = kwargs["_zone"].region(self.graph) if region: kwargs["_region"] = region if "_region" in search_map.keys( ) and "_region" not in search_results: search_results["_region"] = region return kwargs, search_results def collect_resource( self, resources: List[Json], resource_class: Type[BaseResource], attr_map: Dict[str, Any], search_map: Optional[Dict[str, Any]] = None, successors: Optional[Dict[EdgeType, List[str]]] = None, predecessors: Optional[Dict[EdgeType, List[str]]] = None, dump_resource: bool = False, ) -> None: if successors is None: successors = {} if predecessors is None: predecessors = {} if search_map is None: search_map = {} parent_map = {True: predecessors, False: successors} for resource_json in resources: kwargs, search_results = self.default_attributes( resource_json, attr_map=attr_map, search_map=search_map) resource_instance = resource_class(**kwargs) log.debug(f"Adding {resource_instance.rtdname} to the graph") if dump_resource: log.debug(f"Resource Dump: {pformat(resource_json)}") pr = kwargs.get("_region", self.graph.root) log.debug( f"Parent resource for {resource_instance.rtdname} automatically set to {pr.rtdname}" ) self.graph.add_resource(pr, resource_instance, edge_type=EdgeType.default) def add_deferred_connection( search_map: Dict[str, Any], search_map_key: str, is_parent: bool, edge_type: EdgeType, ) -> None: graph_search = search_map[search_map_key] attr = graph_search[0] value_name = graph_search[1] if value_name in resource_json: value = resource_json[value_name] if isinstance(value, List): values = value for value in values: resource_instance.add_deferred_connection( # type: ignore attr, value, is_parent, # type: ignore edge_type=edge_type, ) elif isinstance(value, str): resource_instance.add_deferred_connection( # type: ignore attr, value, is_parent, edge_type=edge_type # type: ignore ) else: log.error(("Unable to add deferred connection for" f" value {value} of type {type(value)}")) def add_edge(search_map_key: str, is_parent: bool) -> None: srs = search_results[search_map_key] for sr in srs: if is_parent: src = sr dst = resource_instance else: src = resource_instance dst = sr self.graph.add_edge(src, dst, edge_type=edge_type) for is_parent, edge_sr_names in parent_map.items(): for edge_type, search_result_names in edge_sr_names.items(): for search_result_name in search_result_names: if search_result_name in search_results: add_edge(search_result_name, is_parent) else: if search_result_name in search_map: add_deferred_connection( search_map, search_result_name, is_parent, edge_type) else: log.error( f"Key {search_result_name} is missing in search_map" ) @metrics_collect_intances.time() # type: ignore def collect_instances(self) -> None: instances = self.client.list_droplets() def get_image(droplet: Json) -> Json: image = droplet["image"] image["region"] = droplet["region"]["slug"] return cast(Json, image) def remove_duplicates(images: List[Json]) -> List[Json]: seen_ids = set() unique_images = [] for image in images: if image["id"] not in seen_ids: unique_images.append(image) seen_ids.add(image["id"]) return unique_images images = [get_image(instance) for instance in instances] images = remove_duplicates(images) self.collect_resource( images, resource_class=DigitalOceanImage, attr_map={ "id": lambda i: str(i["id"]), "urn": lambda i: image_id(i["id"]), "distribution": "distribution", "image_slug": "slug", "is_public": "public", "min_disk_size": "min_disk_size", "image_type": "type", "size_gigabytes": lambda image: int(math.ceil(image.get("size_gigabytes"))), "description": "description", "image_status": "status", }, search_map={ "_region": ["urn", lambda image: region_id(image["region"])], "__tags": [ "urn", lambda image: list( map(lambda tag: tag_id(tag), image.get("tags", []) or [])), ], }, predecessors={ EdgeType.default: ["__tags"], }, ) self.collect_resource( instances, resource_class=DigitalOceanDroplet, attr_map={ "id": lambda i: str(i["id"]), "urn": lambda d: droplet_id(d["id"]), "instance_status": "status", "instance_cores": "vcpus", "instance_memory": lambda d: d["memory"] / 1024.0, "droplet_backup_ids": lambda d: list(map(str, d.get("backup_ids", []) or [])), "is_locked": "locked", "droplet_features": "features", "droplet_image": lambda d: d["image"]["slug"], }, search_map={ "_region": [ "urn", lambda droplet: region_id(droplet["region"]["slug"]), ], "__vpcs": ["urn", lambda droplet: vpc_id(droplet["vpc_uuid"])], "__images": ["urn", lambda droplet: image_id(droplet["image"]["id"])], "__tags": [ "urn", lambda d: list( map(lambda tag: tag_id(tag), d.get("tags", []))), ], }, predecessors={ EdgeType.default: ["__vpcs", "__images", "__tags"], EdgeType.delete: ["__vpcs"], }, ) @metrics_collect_regions.time() # type: ignore def collect_regions(self) -> None: regions = self.client.list_regions() self.collect_resource( regions, resource_class=DigitalOceanRegion, attr_map={ "id": "slug", "urn": lambda r: region_id(r["slug"]), "name": "name", "do_region_slug": "slug", "do_region_features": "features", "is_available": "available", "do_region_droplet_sizes": "sizes", }, ) @metrics_collect_volumes.time() # type: ignore def collect_volumes(self) -> None: volumes = self.client.list_volumes() def extract_volume_status(volume: Json) -> str: in_use = len(volume.get("droplet_ids", []) or []) > 0 return "in-use" if in_use else "available" self.collect_resource( volumes, resource_class=DigitalOceanVolume, attr_map={ "id": "id", "urn": lambda r: volume_id(r["id"]), "volume_size": "size_gigabytes", "description": "description", "filesystem_type": "filesystem_type", "filesystem_label": "filesystem_label", "volume_status": extract_volume_status, }, search_map={ "__users": [ "urn", lambda vol: list( map(lambda id: droplet_id(id), vol["droplet_ids"])), ], "__tags": [ "urn", lambda v: list( map(lambda tag: tag_id(tag), v.get("tags", []))), ], }, predecessors={EdgeType.default: ["__users", "__tags"]}, successors={EdgeType.delete: ["__users"]}, ) @metrics_collect_databases.time() # type: ignore def collect_databases(self) -> None: # this mapping was taken from the digitalocean web console. dbtype_to_size = { "db-s-1vcpu-1gb": 10, "db-s-1vcpu-2gb": 25, "db-s-2vcpu-4gb": 38, "db-s-4vcpu-8gb": 115, "db-s-6vcpu-16gb": 270, "db-s-8vcpu-32gb": 580, "db-s-16vcpu-64gb": 1012, "gd-2vcpu-8gb": 25, "gd-4vcpu-16gb": 60, "gd-8vcpu-32gb": 145, "gd-16vcpu-64gb": 325, "gd-32vcpu-128gb": 695, "gd-40vcpu-160gb": 875, "so1_5-2vcpu-16gb": 400, "so1_5-4vcpu-32gb": 845, "so1_5-8vcpu-64gb": 1680, "so1_5-16vcpu-128gb": 3410, "so1_5-24vcpu-192gb": 5140, "so1_5-32vcpu-256gb": 6860, } databases = self.client.list_databases() self.collect_resource( databases, resource_class=DigitalOceanDatabase, attr_map={ "id": "id", "urn": lambda db: database_id(db["id"]), "name": lambda db: database_id(db["name"]), "db_type": "engine", "db_status": "status", "db_version": "version", "db_endpoint": lambda db: db.get("connection", {}).get("host", ""), "instance_type": "size", "volume_size": lambda db: dbtype_to_size.get(db.get("size", ""), 0), }, search_map={ "_region": ["urn", lambda db: region_id(db["region"])], "__vpcs": ["urn", lambda db: vpc_id(db["private_network_uuid"])], "__tags": [ "urn", lambda db: list( map(lambda tag: tag_id(tag), db.get("tags", []) or [])), ], }, predecessors={ EdgeType.default: ["__vpcs", "__tags"], EdgeType.delete: ["__vpcs"], }, ) @metrics_collect_vpcs.time() # type: ignore def collect_vpcs(self) -> None: vpcs = self.client.list_vpcs() self.collect_resource( vpcs, resource_class=DigitalOceanVPC, attr_map={ "id": "id", "urn": "urn", "ip_range": "ip_range", "description": "description", "is_default": "default", }, search_map={ "_region": ["urn", lambda vpc: region_id(vpc["region"])], }, ) @metrics_collect_projects.time() # type: ignore def collect_projects(self) -> None: def get_resource_id(resource: Json) -> str: return cast(str, resource["urn"]) projects = self.client.list_projects() project_resources = [ list( map(get_resource_id, self.client.list_project_resources(p["id"]))) for p in projects ] for project, resource_ids in zip(projects, project_resources): project["resource_ids"] = resource_ids self.collect_resource( projects, resource_class=DigitalOceanProject, attr_map={ "id": "id", "urn": lambda p: project_id(p["id"]), "owner_uuid": "owner_uuid", "owner_id": lambda p: str(p["owner_id"]), "description": "description", "purpose": "purpose", "environment": "environment", "is_default": "is_default", }, search_map={ "__resources": ["urn", lambda p: p["resource_ids"]], }, successors={ EdgeType.default: ["__resources"], EdgeType.delete: ["__resources"], }, ) @metrics_collect_k8s_clusters.time() # type: ignore def collect_k8s_clusters(self) -> None: clusters = self.client.list_kubernetes_clusters() self.collect_resource( clusters, resource_class=DigitalOceanKubernetesCluster, attr_map={ "id": "id", "urn": lambda c: kubernetes_id(c["id"]), "k8s_version": "version", "k8s_cluster_subnet": "cluster_subnet", "k8s_service_subnet": "service_subnet", "ipv4_address": "ipv4", "endpoint": "endpoint", "auto_upgrade_enabled": "auto_upgrade", "cluster_status": lambda c: c["status"]["state"], "surge_upgrade_enabled": "surge_upgrade", "registry_enabled": "registry_enabled", "ha_enabled": "ha", }, search_map={ "_region": ["urn", lambda c: region_id(c["region"])], "__nodes": [ "urn", lambda cluster: [ droplet_id(node["droplet_id"]) for node_pool in cluster["node_pools"] for node in node_pool["nodes"] ], ], "__vpcs": ["urn", lambda c: vpc_id(c["vpc_uuid"])], }, successors={ EdgeType.default: ["__nodes"], EdgeType.delete: ["__nodes"] }, predecessors={ EdgeType.default: ["__vpcs"], EdgeType.delete: ["__vpcs"] }, ) @metrics_collect_snapshots.time() # type: ignore def collect_snapshots(self) -> None: def get_resource_id(snapshot: Json) -> str: if snapshot["resource_type"] == "droplet": return droplet_id(snapshot["resource_id"]) else: return volume_id(snapshot["resource_id"]) snapshots = self.client.list_snapshots() self.collect_resource( snapshots, resource_class=DigitalOceanSnapshot, attr_map={ "id": lambda s: str(s["id"]), "urn": lambda s: snapshot_id(s["id"]), "volume_size": lambda vol: vol["min_disk_size"], "snapshot_size_gigabytes": lambda vol: int(math.ceil(vol.get("size_gigabytes"))), "resource_id": "resource_id", "resource_type": "resource_type", }, search_map={ "_region": [ "urn", lambda s: [region_id(region) for region in s["regions"]], ], "__resource": ["urn", lambda s: get_resource_id(s)], "__tags": [ "urn", lambda s: list( map(lambda tag: tag_id(tag), s.get("tags", []) or [])), ], }, predecessors={EdgeType.default: ["__resource", "__tags"]}, ) @metrics_collect_load_balancers.time() # type: ignore def collect_load_balancers(self) -> None: loadbalancers = self.client.list_load_balancers() def get_nr_nodes(lb: Json) -> int: size_to_nr_nodes = { "lb-small": 1, "lb-medium": 3, "lb-large": 3, } if lb["size_unit"]: return cast(int, lb["size_unit"]) else: return size_to_nr_nodes.get(lb["size"], 1) self.collect_resource( loadbalancers, resource_class=DigitalOceanLoadBalancer, attr_map={ "id": "id", "urn": lambda lb: loadbalancer_id(lb["id"]), "public_ip_address": "ip", "nr_nodes": get_nr_nodes, "loadbalancer_status": "status", "redirect_http_to_https": "redirect_http_to_https", "enable_proxy_protocol": "enable_proxy_protocol", "enable_backend_keepalive": "enable_backend_keepalive", "disable_lets_encrypt_dns_records": "disable_lets_encrypt_dns_records", }, search_map={ "_region": ["urn", lambda lb: region_id(lb["region"]["slug"])], "__vpcs": ["urn", lambda lb: vpc_id(lb["vpc_uuid"])], "__droplets": [ "urn", lambda lb: list( map(lambda id: droplet_id(id), lb.get("droplet_ids", []) or [])), ], }, predecessors={ EdgeType.default: ["__vpcs"], EdgeType.delete: ["__vpcs"] }, successors={EdgeType.default: ["__droplets"]}, ) @metrics_collect_floating_ips.time() # type: ignore def collect_floating_ips(self) -> None: floating_ips = self.client.list_floating_ips() self.collect_resource( floating_ips, resource_class=DigitalOceanFloatingIP, attr_map={ "id": "ip", "urn": lambda ip: floatingip_id(ip["ip"]), "ip_address": "ip", "ip_address_family": lambda ip: "ipv4", "is_locked": "locked", }, search_map={ "_region": ["urn", lambda ip: region_id(ip["region"]["slug"])], "__droplet": [ "urn", lambda ip: droplet_id(ip.get("droplet", {}).get("id", "")), ], }, predecessors={EdgeType.default: ["__droplet"]}, ) @metrics_collect_spaces.time() # type: ignore def collect_spaces(self, region: DigitalOceanRegion) -> None: spaces = self.client.list_spaces(region.do_region_slug or "") self.collect_resource( spaces, resource_class=DigitalOceanSpace, attr_map={ "id": "Name", "urn": lambda space: space_id(space["Name"]), "name": "Name", "ctime": "CreationDate", }, search_map={ "_region": [ "urn", lambda space: region_id(region.do_region_slug or ""), ], }, ) @metrics_collect_apps.time() # type: ignore def collect_apps(self) -> None: apps = self.client.list_apps() def extract_region(app: Json) -> Optional[str]: region_slug = next( iter(app.get("region", {}).get("data_centers", [])), None) if region_slug is None: return None return region_id(region_slug) def extract_databases(app: Json) -> List[str]: databases = app.get("spec", {}).get("databases", []) names = [database_id(database["name"]) for database in databases] return names self.collect_resource( apps, resource_class=DigitalOceanApp, attr_map={ "id": "id", "urn": lambda app: app_id(app["id"]), "tier_slug": "tier_slug", "default_ingress": "default_ingress", "live_url": "live_url", "live_url_base": "live_url_base", "live_domain": "live_domain", }, search_map={ "_region": ["urn", extract_region], "__databases": ["name", extract_databases], }, predecessors={EdgeType.default: ["__databases"]}, ) @metrics_collect_cdn_endpoints.time() # type: ignore def collect_cdn_endpoints(self) -> None: endpoints = self.client.list_cdn_endpoints() self.collect_resource( endpoints, resource_class=DigitalOceanCdnEndpoint, attr_map={ "id": "id", "urn": lambda endpoint: cdn_endpoint_id(endpoint["id"]), "origin": "origin", "endpoint": "endpoint", "certificate_id": "certificate_id", "custom_domain": "custom_domain", "ttl": "ttl", }, ) @metrics_collect_certificates.time() # type: ignore def collect_certificates(self) -> None: certificates = self.client.list_certificates() self.collect_resource( certificates, resource_class=DigitalOceanCertificate, attr_map={ "id": "id", "urn": lambda c: certificate_id(c["id"]), "expires": lambda c: iso2datetime(c.get("not_after")), "sha1_fingerprint": "sha1_fingerprint", "dns_names": "dns_names", "certificate_state": "state", "certificate_type": "type", }, ) @metrics_collect_container_registry.time() # type: ignore def collect_container_registry(self) -> None: registries = self.client.get_registry_info() for registry in registries: registry["updated_at"] = registry["storage_usage_updated_at"] self.collect_resource( [registry], resource_class=DigitalOceanContainerRegistry, attr_map={ "id": "name", "urn": lambda r: container_registry_id(r["name"]), "storage_usage_bytes": "storage_usage_bytes", "is_read_only": "read_only", }, search_map={ "_region": ["urn", lambda registry: region_id(registry["region"])], }, ) repositories = self.client.list_registry_repositories( registry["name"]) self.collect_resource( repositories, resource_class=DigitalOceanContainerRegistryRepository, attr_map={ "id": "name", "urn": lambda r: container_registry_repository_id( r["registry_name"], r["name"]), "name": "name", "tag_count": "tag_count", "manifest_count": "manifest_count", }, search_map={ "__registry": [ "urn", lambda r: container_registry_id(r["registry_name"]), ], }, predecessors={EdgeType.default: ["__registry"]}, ) tags = [ tag for repository in repositories for tag in self.client.list_registry_repository_tags( registry["name"], repository["name"]) ] self.collect_resource( tags, resource_class=DigitalOceanContainerRegistryRepositoryTag, attr_map={ "id": "tag", "urn": lambda t: container_registry_repository_tag_id( t["registry_name"], t["repository"], t["tag"]), "registry_name": "registry_name", "repository_name": "repository", "name": "tag", "manifest_digest": "manifest_digest", "compressed_size_bytes": "compressed_size_bytes", "size_bytes": "size_bytes", }, search_map={ "__repository": [ "urn", lambda t: container_registry_repository_id( t["registry_name"], t["repository"]), ], "__registry": [ "urn", lambda t: container_registry_id(t["registry_name"]), ], }, predecessors={ EdgeType.default: ["__repository", "__registry"] }, ) @metrics_collect_ssh_keys.time() # type: ignore def collect_ssh_keys(self) -> None: ssh_keys = self.client.list_ssh_keys() self.collect_resource( ssh_keys, resource_class=DigitalOceanSSHKey, attr_map={ "id": lambda k: str(k["id"]), "urn": lambda k: ssh_key_id(k["id"]), "public_key": "public_key", "fingerprint": "fingerprint", }, ) @metrics_collect_tags.time() # type: ignore def collect_tags(self) -> None: tags = self.client.list_tags() self.collect_resource( tags, resource_class=DigitalOceanTag, attr_map={ "id": "name", "urn": lambda t: tag_id(t["name"]), }, ) @metrics_collect_domains.time() # type: ignore def collect_domains(self) -> None: domains = self.client.list_domains() self.collect_resource( domains, resource_class=DigitalOceanDomain, attr_map={ "id": "name", "urn": lambda d: domain_id(d["name"]), "ttl": "ttl", "zone_file": "zone_file", }, ) def update_record(record: Json, domain: Json) -> Json: record["domain_name"] = domain["name"] return record domain_records = [ update_record(record, domain) for domain in domains for record in self.client.list_domain_records(domain["name"]) ] self.collect_resource( domain_records, resource_class=DigitalOceanDomainRecord, attr_map={ "id": lambda r: str(r["id"]), "name": "name", "urn": lambda r: domain_record_id(r["id"]), "domain_name": "domain_name", "record_type": "type", "record_data": "data", "record_priority": "priority", "record_port": "port", "record_ttl": "ttl", "record_weight": "weight", "record_flags": "flags", "record_tag": "tag", }, search_map={ "__domain": ["urn", lambda r: domain_id(r["domain_name"])], }, predecessors={EdgeType.default: ["__domain"]}, ) @metrics_collect_firewalls.time() # type: ignore def collect_firewalls(self) -> None: firewalls = self.client.list_firewalls() self.collect_resource( firewalls, resource_class=DigitalOceanFirewall, attr_map={ "id": "id", "urn": lambda f: firewall_id(f["id"]), "firewall_status": "status", }, search_map={ "__droplets": [ "urn", lambda f: list( map(lambda id: droplet_id(id), f.get("droplet_ids", []) or [])), ], "__tags": [ "urn", lambda f: list( map(lambda id: tag_id(id), f.get("tags", []) or [])), ], }, predecessors={ EdgeType.default: ["__tags"], }, successors={ EdgeType.default: ["__droplets"], }, ) @metrics_collect_alert_policies.time() # type: ignore def collect_alert_policies(self) -> None: alert_policies = self.client.list_alert_policies() self.collect_resource( alert_policies, resource_class=DigitalOceanAlertPolicy, attr_map={ "id": "uuid", "urn": lambda ap: alert_policy_id(ap["uuid"]), "description": "description", "policy_type": "type", "is_enabled": "enabled", }, )
def __init__(self) -> None: super().__init__() self.name = str(self.cloud) cloud = Cloud(self.cloud) self.root = cloud self.graph = Graph(root=self.root)