Exemple #1
0
    def collect(cls, api_client: client.ApiClient, graph: Graph):
        for response in cls.list(api_client):
            kwargs, search_results = default_attributes(
                response, cls.attr_map, cls.search_map, graph)
            parent = graph.root
            namespace = response.metadata.namespace
            resource = cls(**kwargs)
            if namespace:
                ns = graph.search_first_all({
                    "kind": "kubernetes_namespace",
                    "name": namespace
                })
                if ns:
                    parent = ns
            log.debug(f"Collected {resource.rtdname} in {parent.rtdname}")
            graph.add_resource(parent, resource)

            parent_map = {
                True: cls.predecessor_names,
                False: cls.successor_names
            }

            for is_parent, sr_names in parent_map.items():
                for sr_name in sr_names:
                    if sr_name in search_results:
                        srs = search_results[sr_name]
                        for sr in srs:
                            if is_parent:
                                src = sr
                                dst = resource
                            else:
                                src = resource
                                dst = sr
                            graph.add_edge(src, dst)
                    else:
                        if sr_name in cls.search_map:
                            graph_search = cls.search_map[sr_name]
                            attr = graph_search[0]
                            value_name = graph_search[1]
                            value = get_response_data(response, value_name)
                            if value:
                                if isinstance(value, list):
                                    values = value
                                    for value in values:
                                        resource.add_deferred_connection(
                                            {attr: value}, is_parent)
                                elif isinstance(value, str):
                                    resource.add_deferred_connection(
                                        {attr: value}, is_parent)
                                else:
                                    log.error((
                                        "Unable to add deferred connection for"
                                        f" value {value} of type {type(value)}"
                                    ))
                        else:
                            log.error(
                                f"Key {sr_name} is missing in search_map")
            post_process = getattr(cls, "post_process", None)
            if callable(post_process):
                post_process(resource, graph)
Exemple #2
0
def test_baseresource_chksum():
    g = Graph()
    a = SomeTestResource("a", {})
    with pytest.raises(RuntimeError):
        a.chksum
    g.add_node(a)
    assert isinstance(a.chksum, str)
Exemple #3
0
def test_graph_export_iterator():
    g = Graph(root=GraphRoot("root", {}))
    a = SomeTestResource("a", {})
    g.add_resource(g.root, a)
    assert getrefcount(g) == 2
    gei = GraphExportIterator(g)
    assert getrefcount(g) == 3
    gei.export_graph()
    assert getrefcount(g) == 2
    assert len(list(gei)) == 3
Exemple #4
0
    def collect(self) -> None:
        """Runs the actual resource collection across all resource collectors.

        Resource collectors add their resources to the local `self.graph` graph.
        """
        log.info("Collecting DigitalOcean resources for team %s", self.team.id)

        self.graph = Graph(root=self.team)
        collectors = set(self.collector_set)

        log.debug((f"Running the following collectors in {self.team.rtdname}:"
                   f" {', '.join(collectors)}"))

        for collector_name, collector in self.mandatory_collectors:
            if collector_name in collectors:
                log.info(f"Collecting {collector_name} in {self.team.rtdname}")
                collector()

        regions = [
            r for r in self.graph.nodes if isinstance(r, DigitalOceanRegion)
        ]

        for region in regions:
            for collector_name, collector in self.region_collectors:
                if collector_name in collectors:
                    log.info(
                        (f"Collecting {collector_name} in {region.rtdname}"
                         f" {self.team.rtdname}"))
                    collector(region=region)

        for collector_name, collector in self.global_collectors:
            if collector_name in collectors:
                log.info(f"Collecting {collector_name} in {self.team.rtdname}")
                collector()

        remove_nodes = set()

        def rmnodes(cls: Any) -> None:
            for node in self.graph.nodes:
                if isinstance(node, cls) and not any(
                        True for _ in self.graph.successors(node)):
                    remove_nodes.add(node)
            for node in remove_nodes:
                self.graph.remove_node(node)
            log.debug(
                f"Removing {len(remove_nodes)} unreferenced nodes of type {cls}"
            )
            remove_nodes.clear()

        # since regions API will return all available regions, we need to remove
        # the regions that are not used by any resources
        rmnodes(DigitalOceanRegion)
Exemple #5
0
def test_graph_container():
    gc = GraphContainer(cache_graph=False)
    g = Graph()
    n1 = SomeTestResource("foo", {})
    n2 = SomeTestResource("bar", {})
    g.add_node(n1)
    gc.graph.add_resource(gc.GRAPH_ROOT, n2)
    gc.add(g)
    gc.graph.add_edge(n1, n2)
    assert len(gc.graph.nodes) == 3
    assert len(gc.graph.edges) == 2
    assert gc.graph.search_first("id", "bar") == n2
    assert gc.graph.search_first_parent_class(n2, SomeTestResource) == n1
Exemple #6
0
    def graph(self, query: str) -> Graph:
        def process_data_line(data: dict, graph: Graph):
            """Process a single line of resotocore graph data"""

            if data.get("type") == "node":
                node_id = data.get("id")
                node = node_from_dict(data)
                node_mapping[node_id] = node
                log.debug(f"Adding node {node} to the graph")
                graph.add_node(node)
                if node.kind == "graph_root":
                    log.debug(f"Setting graph root {node}")
                    graph.root = node
            elif data.get("type") == "edge":
                node_from = data.get("from")
                node_to = data.get("to")
                edge_type = EdgeType.from_value(data.get("edge_type"))
                if node_from not in node_mapping or node_to not in node_mapping:
                    raise ValueError(f"One of {node_from} -> {node_to} unknown")
                graph.add_edge(
                    node_mapping[node_from], node_mapping[node_to], edge_type=edge_type
                )

        graph = Graph()
        node_mapping = {}
        for data in self.query(query):
            try:
                process_data_line(data, graph)
            except ValueError as e:
                log.error(e)
                continue
        sanitize(graph)
        return graph
Exemple #7
0
def update_model(graph: Graph,
                 resotocore_base_uri: str,
                 dump_json: bool = False,
                 tempdir: str = None) -> None:
    model_uri = f"{resotocore_base_uri}/model"

    log.debug(f"Updating model via {model_uri}")

    model_json = json.dumps(graph.export_model(), indent=4)

    if dump_json:
        ts = datetime.now().strftime("%Y-%m-%d-%H-%M")
        with tempfile.NamedTemporaryFile(
                prefix=f"resoto-model-{ts}-",
                suffix=".json",
                delete=not dump_json,
                dir=tempdir,
        ) as model_outfile:
            log.info(f"Writing model json to file {model_outfile.name}")
            model_outfile.write(model_json.encode())

    headers = {
        "Content-Type": "application/json",
    }
    if getattr(ArgumentParser.args, "psk", None):
        encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk)

    r = requests.patch(model_uri, data=model_json, headers=headers)
    if r.status_code != 200:
        log.error(r.content)
        raise RuntimeError(f"Failed to create model: {r.content}")
def test_resotocore():

    recorded_headers: Dict[str, str] = {}

    def make_query(request: requests.Request) -> requests.Response:
        nonlocal recorded_headers
        recorded_headers = request.headers
        resp = requests.Response()
        resp.status_code = 200
        resp._content = str.encode(json.dumps("OK"))
        return resp

    config = cast(
        Config,
        FakeConfig(
            values={
                "resotoworker": {
                    "graph": "resoto",
                    "debug_dump_json": False,
                    "tempdir": "/tmp",
                    "graph_merge_kind": "foo_kind",
                },
                "running_config": None,
            }
        ),
    )

    core = Resotocore(make_query, config)

    core.send_to_resotocore(Graph(), "task_123")
    print(recorded_headers)

    assert recorded_headers["Resoto-Worker-Task-Id"] == "task_123"
Exemple #9
0
def prepare_graph(do_client) -> Graph:
    cloud = Cloud("do")
    team = DigitalOceanTeam(id="test_team", urn="do:team:test_team")
    plugin_instance = DigitalOceanTeamCollector(team, do_client)
    plugin_instance.collect()
    cloud_graph = Graph(root=cloud)
    graph = Graph(root=GraphRoot("root", {}))
    cloud_graph.merge(plugin_instance.graph)
    graph.merge(cloud_graph)
    sanitize(graph)
    return graph
Exemple #10
0
        def collect(collectors: List[BaseCollectorPlugin]) -> Graph:
            graph = Graph(root=GraphRoot("root", {}))

            max_workers = (len(collectors) if len(collectors) <
                           self._config.resotoworker.pool_size else
                           self._config.resotoworker.pool_size)
            if max_workers == 0:
                log.error(
                    "No workers configured or no collector plugins loaded - skipping collect"
                )
                return
            pool_args = {"max_workers": max_workers}
            if self._config.resotoworker.fork_process:
                pool_args["mp_context"] = multiprocessing.get_context("spawn")
                pool_args["initializer"] = resotolib.proc.initializer
                pool_executor = futures.ProcessPoolExecutor
                collect_args = {
                    "args": ArgumentParser.args,
                    "running_config": self._config.running_config,
                }
            else:
                pool_executor = futures.ThreadPoolExecutor
                collect_args = {}

            with pool_executor(**pool_args) as executor:
                wait_for = [
                    executor.submit(
                        collect_plugin_graph,
                        collector,
                        **collect_args,
                    ) for collector in collectors
                ]
                for future in futures.as_completed(wait_for):
                    cluster_graph = future.result()
                    if not isinstance(cluster_graph, Graph):
                        log.error(
                            f"Skipping invalid cluster_graph {type(cluster_graph)}"
                        )
                        continue
                    graph.merge(cluster_graph)
            sanitize(graph)
            return graph
Exemple #11
0
 def __init__(self, cluster: KubernetesCluster,
              cluster_config: client.Configuration) -> None:
     """
     Args:
         cluster: The K8S cluster resource object this cluster collector
             is going to collect.
     """
     self.cluster = cluster
     self.config = cluster_config
     self.api_client = client.ApiClient(self.config)
     self.graph = Graph(root=self.cluster)
Exemple #12
0
 def update_users_groups_channels(self, graph: Graph):
     log.debug("Updating Users Groups and Channels")
     tmp_users = {}
     tmp_emails = {}
     tmp_usergroups = {}
     tmp_channels = {}
     for user in graph.search("kind", "slack_user"):
         tmp_users[user.name] = user
         if user.email:
             tmp_emails[user.email] = user
     for usergroup in graph.search("kind", "slack_usergroup"):
         if usergroup.is_usergroup:
             tmp_usergroups[usergroup.name] = usergroup
     for channel in graph.search("kind", "slack_conversation"):
         if channel.is_channel:
             tmp_channels[channel.name] = channel
     self.users2id = tmp_users
     self.emails2id = tmp_emails
     self.usergroups2id = tmp_usergroups
     self.channels2id = tmp_channels
Exemple #13
0
        def process_data_line(data: dict, graph: Graph):
            """Process a single line of resotocore graph data"""

            if data.get("type") == "node":
                node_id = data.get("id")
                node = node_from_dict(data)
                node_mapping[node_id] = node
                log.debug(f"Adding node {node} to the graph")
                graph.add_node(node)
                if node.kind == "graph_root":
                    log.debug(f"Setting graph root {node}")
                    graph.root = node
            elif data.get("type") == "edge":
                node_from = data.get("from")
                node_to = data.get("to")
                edge_type = EdgeType.from_value(data.get("edge_type"))
                if node_from not in node_mapping or node_to not in node_mapping:
                    raise ValueError(f"One of {node_from} -> {node_to} unknown")
                graph.add_edge(
                    node_mapping[node_from], node_mapping[node_to], edge_type=edge_type
                )
Exemple #14
0
def test_graph():
    g = Graph()
    n1 = SomeTestResource("foo", {})
    n2 = SomeTestResource("bar", {})
    g.add_node(n1)
    g.add_node(n2)
    g.add_edge(n1, n2)
    assert len(g.nodes) == 2
    assert len(g.edges) == 1
Exemple #15
0
def default_attributes(
    response, attr_map: Dict, search_map: Dict, graph: Graph
) -> Dict:
    kwargs = {
        "id": response.metadata.uid,
        "name": response.metadata.name,
        "ctime": response.metadata.creation_timestamp,
        "self_link": response.metadata.self_link,
        "tags": response.metadata.labels if response.metadata.labels else {},
        "_api_response": pformat(response),
    }
    search_results = {}
    for map_to, map_from in attr_map.items():
        data = get_response_data(response, map_from)
        if data is None:
            log.debug(f"Unable to set {map_to}, attribute {map_from} not in result")
            continue
        kwargs[map_to] = data

    for map_to, search_data in search_map.items():
        search_attr = search_data[0]
        search_value_name = search_data[1]
        search_value = get_response_data(response, search_value_name)
        if search_value is None:
            continue
        if isinstance(search_value, list):
            search_values = search_value
        else:
            search_values = [search_value]
        for search_value in search_values:
            search_result = graph.search_first(search_attr, search_value)
            if search_result:
                if map_to not in search_results:
                    search_results[map_to] = []
                search_results[map_to].append(search_result)
        if (
            map_to not in kwargs
            and map_to in search_results
            and not str(map_to).startswith("_")
        ):
            search_result = search_results[map_to]
            if len(search_result) == 1:
                kwargs[map_to] = search_result[0]
            else:
                kwargs[map_to] = list(search_result)

    return kwargs, search_results
Exemple #16
0
    def alarm_cleanup(self, graph: Graph):
        log.info("AWS Cloudwatch Alarms cleanup called")
        for node in graph.nodes:
            if node.protected or not isinstance(node, AWSCloudwatchAlarm):
                continue

            cloud = node.cloud(graph)
            account = node.account(graph)
            region = node.region(graph)
            log_prefix = (
                f"Found {node.rtdname} in cloud {cloud.name} account {account.dname} "
                f"region {region.name}.")

            if len(self.config) > 0:
                if (cloud.id not in self.config
                        or account.id not in self.config[cloud.id]):
                    log.debug((
                        f"{log_prefix} Account not found in config - ignoring."
                    ))
                    continue

            should_clean = False
            i = None
            log_msg = log_prefix
            for dimension in node.dimensions:
                if dimension.get("Name") == "InstanceId":
                    instance_id = dimension.get("Value")
                    i = graph.search_first_all({
                        "kind": "aws_ec2_instance",
                        "id": instance_id
                    })
                    if isinstance(
                            i, AWSEC2Instance) and i.instance_status not in (
                                "terminated"):
                        should_clean = False
                        break
                    else:
                        should_clean = True
                        log_msg += f" Referenced EC2 instance {instance_id} not found."

            if not should_clean:
                continue
            log.debug(f"{log_msg} - cleaning alarm")
            node.clean = True
Exemple #17
0
def test_multidigraph():
    g = Graph()
    a = SomeTestResource("a", {})
    b = SomeTestResource("b", {})
    c = SomeTestResource("c", {})
    d = SomeTestResource("d", {})
    g.add_resource(a, b)
    g.add_resource(b, c)
    g.add_resource(c, d)
    g.add_edge(b, a, edge_type=EdgeType.delete)
    g.add_edge(b, d, edge_type=EdgeType.delete)
    assert len(g.nodes) == 4
    assert len(g.edges) == 5
    assert len(list(g.successors(a))) == 1
    g.add_edge(a, b)
    assert len(list(g.successors(a))) == 1
    assert len(list(g.predecessors(b))) == 1
    assert len(list(g.predecessors(a))) == 0
    assert len(list(g.successors(b))) == 1
    assert len(list(g.successors(c))) == 1
    assert len(list(g.successors(d))) == 0
    assert len(list(g.predecessors(a, edge_type=EdgeType.delete))) == 1
    assert len(list(g.successors(b, edge_type=EdgeType.delete))) == 2
    assert len(list(g.successors(a, edge_type=EdgeType.delete))) == 0
    assert len(list(g.predecessors(b, edge_type=EdgeType.delete))) == 0
    assert len(list(g.ancestors(a))) == 0
    assert len(list(g.descendants(a))) == 3
    assert len(list(g.descendants(a, edge_type=EdgeType.delete))) == 0
    assert len(list(g.descendants(b))) == 2
    assert len(list(g.descendants(b, edge_type=EdgeType.delete))) == 2
    assert g.is_dag_per_edge_type()
    g.add_edge(b, a)
    assert g.is_dag_per_edge_type() is False
Exemple #18
0
def test_graph_merge():
    rg1 = Graph()
    rg2 = Graph()
    a = SomeTestResource("a", {})
    b = SomeTestResource("b", {})
    c = SomeTestResource("c", {})
    d = SomeTestResource("d", {})
    rg1.add_node(a)
    rg1.add_node(b)
    rg2.add_node(c)
    rg2.add_node(d)
    rg1.add_edge(a, b, edge_type=EdgeType.delete)
    rg2.add_edge(c, d, edge_type=EdgeType.delete)
    rg1.merge(rg2)
    assert len(rg1.nodes) == 4
    assert len(rg1.edges) == 2
    for edge in rg1.edges:
        assert len(edge) == 3
        key = edge[2]
        assert len(key) == 3
        edge_type = key[2]
        assert edge_type == EdgeType.delete
Exemple #19
0
class DigitalOceanTeamCollector:
    """Collects a single DigitalOcean team

    Responsible for collecting all the resources of an individual team.
    Builds up its own local graph which is then taken by collect_project()
    and merged with the plugin graph.

    This way we can have many instances of DigitalOceanCollectorPlugin running in parallel.
    All building up indivetual graphs which in the end are merged to a final graph containing
    all DigitalOcean resources
    """
    def __init__(self, team: DigitalOceanTeam,
                 client: StreamingWrapper) -> None:
        self.client = client
        self.team = team

        # Mandatory collectors are always collected regardless of whether
        # they were included by --do-collect or excluded by --do-no-collect
        self.mandatory_collectors: List[Tuple[str, Callable[..., None]]] = [
            ("regions", self.collect_regions)
        ]
        # Global collectors are resources that are either specified on a global level
        # as opposed to a per zone or per region level or they are zone/region
        # resources that provide a aggregatedList() function returning all resources
        # for all zones/regions.
        self.global_collectors: List[Tuple[str, Callable[..., None]]] = [
            ("tags", self.collect_tags),
            ("vpcs", self.collect_vpcs),
            ("instances", self.collect_instances),
            ("volumes", self.collect_volumes),
            ("databases", self.collect_databases),
            ("k8s_clusters", self.collect_k8s_clusters),
            ("snapshots", self.collect_snapshots),
            ("load_balancers", self.collect_load_balancers),
            ("floating_ips", self.collect_floating_ips),
            ("project", self.collect_projects),
            ("apps", self.collect_apps),
            ("cdn_endpoints", self.collect_cdn_endpoints),
            ("certificates", self.collect_certificates),
            ("container_registry", self.collect_container_registry),
            ("ssh_keys", self.collect_ssh_keys),
            ("domains", self.collect_domains),
            ("firewalls", self.collect_firewalls),
            ("alert_policies", self.collect_alert_policies),
        ]

        self.region_collectors: List[Tuple[str, Callable[..., None]]] = [
            ("spaces", self.collect_spaces),
        ]

        self.all_collectors = dict(self.mandatory_collectors)
        self.all_collectors.update(self.region_collectors)
        self.all_collectors.update(self.global_collectors)
        self.collector_set = set(self.all_collectors.keys())

    def collect(self) -> None:
        """Runs the actual resource collection across all resource collectors.

        Resource collectors add their resources to the local `self.graph` graph.
        """
        log.info("Collecting DigitalOcean resources for team %s", self.team.id)

        self.graph = Graph(root=self.team)
        collectors = set(self.collector_set)

        log.debug((f"Running the following collectors in {self.team.rtdname}:"
                   f" {', '.join(collectors)}"))

        for collector_name, collector in self.mandatory_collectors:
            if collector_name in collectors:
                log.info(f"Collecting {collector_name} in {self.team.rtdname}")
                collector()

        regions = [
            r for r in self.graph.nodes if isinstance(r, DigitalOceanRegion)
        ]

        for region in regions:
            for collector_name, collector in self.region_collectors:
                if collector_name in collectors:
                    log.info(
                        (f"Collecting {collector_name} in {region.rtdname}"
                         f" {self.team.rtdname}"))
                    collector(region=region)

        for collector_name, collector in self.global_collectors:
            if collector_name in collectors:
                log.info(f"Collecting {collector_name} in {self.team.rtdname}")
                collector()

        remove_nodes = set()

        def rmnodes(cls: Any) -> None:
            for node in self.graph.nodes:
                if isinstance(node, cls) and not any(
                        True for _ in self.graph.successors(node)):
                    remove_nodes.add(node)
            for node in remove_nodes:
                self.graph.remove_node(node)
            log.debug(
                f"Removing {len(remove_nodes)} unreferenced nodes of type {cls}"
            )
            remove_nodes.clear()

        # since regions API will return all available regions, we need to remove
        # the regions that are not used by any resources
        rmnodes(DigitalOceanRegion)

    def default_attributes(
        self,
        result: Dict[str, Any],
        attr_map: Dict[str, Any],
        search_map: Dict[str, Any],
    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
        """See a similar method in the GCPCollectorPlugin"""

        # The following are default attributes that are passed to every
        # BaseResource() if found in `result`
        def extract_tags(result: Dict[str, Any]) -> Dict[str, Optional[str]]:
            raw_tags = result.get("tags", [])
            raw_tags = raw_tags if raw_tags else []
            tags = [parse_tag(tag) for tag in raw_tags if tag]
            return dict(tags) if tags else {}

        kwargs = {
            "id": str(result.get("id")),
            "tags": extract_tags(result),
            "name": result.get("name"),
            "ctime": iso2datetime(result.get("created_at")),
            "mtime": iso2datetime(result.get("updated_at")),
            "_account": self.team,
        }

        if attr_map is not None:
            for map_to, attribute_selector in attr_map.items():
                data = get_result_data(result, attribute_selector)
                if data is None:
                    log.debug(f"Attribute {attribute_selector} not in result")
                    continue
                log.debug(f"Found attribute {map_to}: {pformat(data)}")
                kwargs[map_to] = data

        # By default we search for a resources region and/or zone
        default_search_map: Dict[str, Any] = {}
        search_results: Dict[str, Any] = {}
        if search_map is None:
            search_map = dict(default_search_map)
        else:
            updated_search_map = dict(default_search_map)
            updated_search_map.update(search_map)
            search_map = updated_search_map

        for map_to, search_data in search_map.items():
            search_attr = search_data[0]
            search_value_name = search_data[1]
            search_value = get_result_data(result, search_value_name)
            if search_value is None:
                continue
            if isinstance(search_value, List):
                search_values = search_value
            else:
                search_values = [search_value]
            for search_value in search_values:
                search_result = self.graph.search_first(
                    search_attr, search_value)
                if search_result:
                    if map_to not in search_results:
                        search_results[map_to] = []
                    search_results[map_to].append(search_result)
            if map_to not in kwargs and map_to in search_results and not str(
                    map_to).startswith("__"):
                search_result = search_results[map_to]
                if len(search_result) == 1:
                    kwargs[map_to] = search_result[0]
                else:
                    kwargs[map_to] = list(search_result)

        # If the resource was referencing a zone but not a region we look up its
        # region based on the zone information we found.
        # E.g. if we know a disk is in zone us-central1-a then we can find
        # the region us-central1 from that.
        if "_zone" in kwargs and "_region" not in kwargs and isinstance(
                kwargs["_zone"], BaseResource):
            region = kwargs["_zone"].region(self.graph)
            if region:
                kwargs["_region"] = region
                if "_region" in search_map.keys(
                ) and "_region" not in search_results:
                    search_results["_region"] = region

        return kwargs, search_results

    def collect_resource(
        self,
        resources: List[Json],
        resource_class: Type[BaseResource],
        attr_map: Dict[str, Any],
        search_map: Optional[Dict[str, Any]] = None,
        successors: Optional[Dict[EdgeType, List[str]]] = None,
        predecessors: Optional[Dict[EdgeType, List[str]]] = None,
        dump_resource: bool = False,
    ) -> None:

        if successors is None:
            successors = {}
        if predecessors is None:
            predecessors = {}
        if search_map is None:
            search_map = {}
        parent_map = {True: predecessors, False: successors}

        for resource_json in resources:
            kwargs, search_results = self.default_attributes(
                resource_json, attr_map=attr_map, search_map=search_map)
            resource_instance = resource_class(**kwargs)
            log.debug(f"Adding {resource_instance.rtdname} to the graph")
            if dump_resource:
                log.debug(f"Resource Dump: {pformat(resource_json)}")

            pr = kwargs.get("_region", self.graph.root)
            log.debug(
                f"Parent resource for {resource_instance.rtdname} automatically set to {pr.rtdname}"
            )
            self.graph.add_resource(pr,
                                    resource_instance,
                                    edge_type=EdgeType.default)

            def add_deferred_connection(
                search_map: Dict[str, Any],
                search_map_key: str,
                is_parent: bool,
                edge_type: EdgeType,
            ) -> None:
                graph_search = search_map[search_map_key]
                attr = graph_search[0]
                value_name = graph_search[1]
                if value_name in resource_json:
                    value = resource_json[value_name]
                    if isinstance(value, List):
                        values = value
                        for value in values:
                            resource_instance.add_deferred_connection(  # type: ignore
                                attr,
                                value,
                                is_parent,  # type: ignore
                                edge_type=edge_type,
                            )
                    elif isinstance(value, str):
                        resource_instance.add_deferred_connection(  # type: ignore
                            attr,
                            value,
                            is_parent,
                            edge_type=edge_type  # type: ignore
                        )
                    else:
                        log.error(("Unable to add deferred connection for"
                                   f" value {value} of type {type(value)}"))

            def add_edge(search_map_key: str, is_parent: bool) -> None:
                srs = search_results[search_map_key]
                for sr in srs:
                    if is_parent:
                        src = sr
                        dst = resource_instance
                    else:
                        src = resource_instance
                        dst = sr
                    self.graph.add_edge(src, dst, edge_type=edge_type)

            for is_parent, edge_sr_names in parent_map.items():
                for edge_type, search_result_names in edge_sr_names.items():
                    for search_result_name in search_result_names:
                        if search_result_name in search_results:
                            add_edge(search_result_name, is_parent)
                        else:
                            if search_result_name in search_map:
                                add_deferred_connection(
                                    search_map, search_result_name, is_parent,
                                    edge_type)
                            else:
                                log.error(
                                    f"Key {search_result_name} is missing in search_map"
                                )

    @metrics_collect_intances.time()  # type: ignore
    def collect_instances(self) -> None:
        instances = self.client.list_droplets()

        def get_image(droplet: Json) -> Json:
            image = droplet["image"]
            image["region"] = droplet["region"]["slug"]
            return cast(Json, image)

        def remove_duplicates(images: List[Json]) -> List[Json]:
            seen_ids = set()
            unique_images = []
            for image in images:
                if image["id"] not in seen_ids:
                    unique_images.append(image)
                    seen_ids.add(image["id"])
            return unique_images

        images = [get_image(instance) for instance in instances]
        images = remove_duplicates(images)

        self.collect_resource(
            images,
            resource_class=DigitalOceanImage,
            attr_map={
                "id":
                lambda i: str(i["id"]),
                "urn":
                lambda i: image_id(i["id"]),
                "distribution":
                "distribution",
                "image_slug":
                "slug",
                "is_public":
                "public",
                "min_disk_size":
                "min_disk_size",
                "image_type":
                "type",
                "size_gigabytes":
                lambda image: int(math.ceil(image.get("size_gigabytes"))),
                "description":
                "description",
                "image_status":
                "status",
            },
            search_map={
                "_region": ["urn", lambda image: region_id(image["region"])],
                "__tags": [
                    "urn",
                    lambda image: list(
                        map(lambda tag: tag_id(tag),
                            image.get("tags", []) or [])),
                ],
            },
            predecessors={
                EdgeType.default: ["__tags"],
            },
        )
        self.collect_resource(
            instances,
            resource_class=DigitalOceanDroplet,
            attr_map={
                "id":
                lambda i: str(i["id"]),
                "urn":
                lambda d: droplet_id(d["id"]),
                "instance_status":
                "status",
                "instance_cores":
                "vcpus",
                "instance_memory":
                lambda d: d["memory"] / 1024.0,
                "droplet_backup_ids":
                lambda d: list(map(str,
                                   d.get("backup_ids", []) or [])),
                "is_locked":
                "locked",
                "droplet_features":
                "features",
                "droplet_image":
                lambda d: d["image"]["slug"],
            },
            search_map={
                "_region": [
                    "urn",
                    lambda droplet: region_id(droplet["region"]["slug"]),
                ],
                "__vpcs": ["urn", lambda droplet: vpc_id(droplet["vpc_uuid"])],
                "__images":
                ["urn", lambda droplet: image_id(droplet["image"]["id"])],
                "__tags": [
                    "urn",
                    lambda d: list(
                        map(lambda tag: tag_id(tag), d.get("tags", []))),
                ],
            },
            predecessors={
                EdgeType.default: ["__vpcs", "__images", "__tags"],
                EdgeType.delete: ["__vpcs"],
            },
        )

    @metrics_collect_regions.time()  # type: ignore
    def collect_regions(self) -> None:
        regions = self.client.list_regions()
        self.collect_resource(
            regions,
            resource_class=DigitalOceanRegion,
            attr_map={
                "id": "slug",
                "urn": lambda r: region_id(r["slug"]),
                "name": "name",
                "do_region_slug": "slug",
                "do_region_features": "features",
                "is_available": "available",
                "do_region_droplet_sizes": "sizes",
            },
        )

    @metrics_collect_volumes.time()  # type: ignore
    def collect_volumes(self) -> None:
        volumes = self.client.list_volumes()

        def extract_volume_status(volume: Json) -> str:
            in_use = len(volume.get("droplet_ids", []) or []) > 0
            return "in-use" if in_use else "available"

        self.collect_resource(
            volumes,
            resource_class=DigitalOceanVolume,
            attr_map={
                "id": "id",
                "urn": lambda r: volume_id(r["id"]),
                "volume_size": "size_gigabytes",
                "description": "description",
                "filesystem_type": "filesystem_type",
                "filesystem_label": "filesystem_label",
                "volume_status": extract_volume_status,
            },
            search_map={
                "__users": [
                    "urn",
                    lambda vol: list(
                        map(lambda id: droplet_id(id), vol["droplet_ids"])),
                ],
                "__tags": [
                    "urn",
                    lambda v: list(
                        map(lambda tag: tag_id(tag), v.get("tags", []))),
                ],
            },
            predecessors={EdgeType.default: ["__users", "__tags"]},
            successors={EdgeType.delete: ["__users"]},
        )

    @metrics_collect_databases.time()  # type: ignore
    def collect_databases(self) -> None:

        # this mapping was taken from the digitalocean web console.
        dbtype_to_size = {
            "db-s-1vcpu-1gb": 10,
            "db-s-1vcpu-2gb": 25,
            "db-s-2vcpu-4gb": 38,
            "db-s-4vcpu-8gb": 115,
            "db-s-6vcpu-16gb": 270,
            "db-s-8vcpu-32gb": 580,
            "db-s-16vcpu-64gb": 1012,
            "gd-2vcpu-8gb": 25,
            "gd-4vcpu-16gb": 60,
            "gd-8vcpu-32gb": 145,
            "gd-16vcpu-64gb": 325,
            "gd-32vcpu-128gb": 695,
            "gd-40vcpu-160gb": 875,
            "so1_5-2vcpu-16gb": 400,
            "so1_5-4vcpu-32gb": 845,
            "so1_5-8vcpu-64gb": 1680,
            "so1_5-16vcpu-128gb": 3410,
            "so1_5-24vcpu-192gb": 5140,
            "so1_5-32vcpu-256gb": 6860,
        }

        databases = self.client.list_databases()
        self.collect_resource(
            databases,
            resource_class=DigitalOceanDatabase,
            attr_map={
                "id":
                "id",
                "urn":
                lambda db: database_id(db["id"]),
                "name":
                lambda db: database_id(db["name"]),
                "db_type":
                "engine",
                "db_status":
                "status",
                "db_version":
                "version",
                "db_endpoint":
                lambda db: db.get("connection", {}).get("host", ""),
                "instance_type":
                "size",
                "volume_size":
                lambda db: dbtype_to_size.get(db.get("size", ""), 0),
            },
            search_map={
                "_region": ["urn", lambda db: region_id(db["region"])],
                "__vpcs":
                ["urn", lambda db: vpc_id(db["private_network_uuid"])],
                "__tags": [
                    "urn",
                    lambda db: list(
                        map(lambda tag: tag_id(tag),
                            db.get("tags", []) or [])),
                ],
            },
            predecessors={
                EdgeType.default: ["__vpcs", "__tags"],
                EdgeType.delete: ["__vpcs"],
            },
        )

    @metrics_collect_vpcs.time()  # type: ignore
    def collect_vpcs(self) -> None:
        vpcs = self.client.list_vpcs()
        self.collect_resource(
            vpcs,
            resource_class=DigitalOceanVPC,
            attr_map={
                "id": "id",
                "urn": "urn",
                "ip_range": "ip_range",
                "description": "description",
                "is_default": "default",
            },
            search_map={
                "_region": ["urn", lambda vpc: region_id(vpc["region"])],
            },
        )

    @metrics_collect_projects.time()  # type: ignore
    def collect_projects(self) -> None:
        def get_resource_id(resource: Json) -> str:
            return cast(str, resource["urn"])

        projects = self.client.list_projects()
        project_resources = [
            list(
                map(get_resource_id,
                    self.client.list_project_resources(p["id"])))
            for p in projects
        ]

        for project, resource_ids in zip(projects, project_resources):
            project["resource_ids"] = resource_ids

        self.collect_resource(
            projects,
            resource_class=DigitalOceanProject,
            attr_map={
                "id": "id",
                "urn": lambda p: project_id(p["id"]),
                "owner_uuid": "owner_uuid",
                "owner_id": lambda p: str(p["owner_id"]),
                "description": "description",
                "purpose": "purpose",
                "environment": "environment",
                "is_default": "is_default",
            },
            search_map={
                "__resources": ["urn", lambda p: p["resource_ids"]],
            },
            successors={
                EdgeType.default: ["__resources"],
                EdgeType.delete: ["__resources"],
            },
        )

    @metrics_collect_k8s_clusters.time()  # type: ignore
    def collect_k8s_clusters(self) -> None:
        clusters = self.client.list_kubernetes_clusters()
        self.collect_resource(
            clusters,
            resource_class=DigitalOceanKubernetesCluster,
            attr_map={
                "id": "id",
                "urn": lambda c: kubernetes_id(c["id"]),
                "k8s_version": "version",
                "k8s_cluster_subnet": "cluster_subnet",
                "k8s_service_subnet": "service_subnet",
                "ipv4_address": "ipv4",
                "endpoint": "endpoint",
                "auto_upgrade_enabled": "auto_upgrade",
                "cluster_status": lambda c: c["status"]["state"],
                "surge_upgrade_enabled": "surge_upgrade",
                "registry_enabled": "registry_enabled",
                "ha_enabled": "ha",
            },
            search_map={
                "_region": ["urn", lambda c: region_id(c["region"])],
                "__nodes": [
                    "urn",
                    lambda cluster: [
                        droplet_id(node["droplet_id"])
                        for node_pool in cluster["node_pools"]
                        for node in node_pool["nodes"]
                    ],
                ],
                "__vpcs": ["urn", lambda c: vpc_id(c["vpc_uuid"])],
            },
            successors={
                EdgeType.default: ["__nodes"],
                EdgeType.delete: ["__nodes"]
            },
            predecessors={
                EdgeType.default: ["__vpcs"],
                EdgeType.delete: ["__vpcs"]
            },
        )

    @metrics_collect_snapshots.time()  # type: ignore
    def collect_snapshots(self) -> None:
        def get_resource_id(snapshot: Json) -> str:
            if snapshot["resource_type"] == "droplet":
                return droplet_id(snapshot["resource_id"])
            else:
                return volume_id(snapshot["resource_id"])

        snapshots = self.client.list_snapshots()
        self.collect_resource(
            snapshots,
            resource_class=DigitalOceanSnapshot,
            attr_map={
                "id":
                lambda s: str(s["id"]),
                "urn":
                lambda s: snapshot_id(s["id"]),
                "volume_size":
                lambda vol: vol["min_disk_size"],
                "snapshot_size_gigabytes":
                lambda vol: int(math.ceil(vol.get("size_gigabytes"))),
                "resource_id":
                "resource_id",
                "resource_type":
                "resource_type",
            },
            search_map={
                "_region": [
                    "urn",
                    lambda s: [region_id(region) for region in s["regions"]],
                ],
                "__resource": ["urn", lambda s: get_resource_id(s)],
                "__tags": [
                    "urn",
                    lambda s: list(
                        map(lambda tag: tag_id(tag),
                            s.get("tags", []) or [])),
                ],
            },
            predecessors={EdgeType.default: ["__resource", "__tags"]},
        )

    @metrics_collect_load_balancers.time()  # type: ignore
    def collect_load_balancers(self) -> None:
        loadbalancers = self.client.list_load_balancers()

        def get_nr_nodes(lb: Json) -> int:
            size_to_nr_nodes = {
                "lb-small": 1,
                "lb-medium": 3,
                "lb-large": 3,
            }
            if lb["size_unit"]:
                return cast(int, lb["size_unit"])
            else:
                return size_to_nr_nodes.get(lb["size"], 1)

        self.collect_resource(
            loadbalancers,
            resource_class=DigitalOceanLoadBalancer,
            attr_map={
                "id":
                "id",
                "urn":
                lambda lb: loadbalancer_id(lb["id"]),
                "public_ip_address":
                "ip",
                "nr_nodes":
                get_nr_nodes,
                "loadbalancer_status":
                "status",
                "redirect_http_to_https":
                "redirect_http_to_https",
                "enable_proxy_protocol":
                "enable_proxy_protocol",
                "enable_backend_keepalive":
                "enable_backend_keepalive",
                "disable_lets_encrypt_dns_records":
                "disable_lets_encrypt_dns_records",
            },
            search_map={
                "_region": ["urn", lambda lb: region_id(lb["region"]["slug"])],
                "__vpcs": ["urn", lambda lb: vpc_id(lb["vpc_uuid"])],
                "__droplets": [
                    "urn",
                    lambda lb: list(
                        map(lambda id: droplet_id(id),
                            lb.get("droplet_ids", []) or [])),
                ],
            },
            predecessors={
                EdgeType.default: ["__vpcs"],
                EdgeType.delete: ["__vpcs"]
            },
            successors={EdgeType.default: ["__droplets"]},
        )

    @metrics_collect_floating_ips.time()  # type: ignore
    def collect_floating_ips(self) -> None:
        floating_ips = self.client.list_floating_ips()
        self.collect_resource(
            floating_ips,
            resource_class=DigitalOceanFloatingIP,
            attr_map={
                "id": "ip",
                "urn": lambda ip: floatingip_id(ip["ip"]),
                "ip_address": "ip",
                "ip_address_family": lambda ip: "ipv4",
                "is_locked": "locked",
            },
            search_map={
                "_region": ["urn", lambda ip: region_id(ip["region"]["slug"])],
                "__droplet": [
                    "urn",
                    lambda ip: droplet_id(ip.get("droplet", {}).get("id", "")),
                ],
            },
            predecessors={EdgeType.default: ["__droplet"]},
        )

    @metrics_collect_spaces.time()  # type: ignore
    def collect_spaces(self, region: DigitalOceanRegion) -> None:
        spaces = self.client.list_spaces(region.do_region_slug or "")
        self.collect_resource(
            spaces,
            resource_class=DigitalOceanSpace,
            attr_map={
                "id": "Name",
                "urn": lambda space: space_id(space["Name"]),
                "name": "Name",
                "ctime": "CreationDate",
            },
            search_map={
                "_region": [
                    "urn",
                    lambda space: region_id(region.do_region_slug or ""),
                ],
            },
        )

    @metrics_collect_apps.time()  # type: ignore
    def collect_apps(self) -> None:
        apps = self.client.list_apps()

        def extract_region(app: Json) -> Optional[str]:
            region_slug = next(
                iter(app.get("region", {}).get("data_centers", [])), None)
            if region_slug is None:
                return None
            return region_id(region_slug)

        def extract_databases(app: Json) -> List[str]:
            databases = app.get("spec", {}).get("databases", [])
            names = [database_id(database["name"]) for database in databases]
            return names

        self.collect_resource(
            apps,
            resource_class=DigitalOceanApp,
            attr_map={
                "id": "id",
                "urn": lambda app: app_id(app["id"]),
                "tier_slug": "tier_slug",
                "default_ingress": "default_ingress",
                "live_url": "live_url",
                "live_url_base": "live_url_base",
                "live_domain": "live_domain",
            },
            search_map={
                "_region": ["urn", extract_region],
                "__databases": ["name", extract_databases],
            },
            predecessors={EdgeType.default: ["__databases"]},
        )

    @metrics_collect_cdn_endpoints.time()  # type: ignore
    def collect_cdn_endpoints(self) -> None:
        endpoints = self.client.list_cdn_endpoints()
        self.collect_resource(
            endpoints,
            resource_class=DigitalOceanCdnEndpoint,
            attr_map={
                "id": "id",
                "urn": lambda endpoint: cdn_endpoint_id(endpoint["id"]),
                "origin": "origin",
                "endpoint": "endpoint",
                "certificate_id": "certificate_id",
                "custom_domain": "custom_domain",
                "ttl": "ttl",
            },
        )

    @metrics_collect_certificates.time()  # type: ignore
    def collect_certificates(self) -> None:
        certificates = self.client.list_certificates()
        self.collect_resource(
            certificates,
            resource_class=DigitalOceanCertificate,
            attr_map={
                "id": "id",
                "urn": lambda c: certificate_id(c["id"]),
                "expires": lambda c: iso2datetime(c.get("not_after")),
                "sha1_fingerprint": "sha1_fingerprint",
                "dns_names": "dns_names",
                "certificate_state": "state",
                "certificate_type": "type",
            },
        )

    @metrics_collect_container_registry.time()  # type: ignore
    def collect_container_registry(self) -> None:
        registries = self.client.get_registry_info()
        for registry in registries:
            registry["updated_at"] = registry["storage_usage_updated_at"]
            self.collect_resource(
                [registry],
                resource_class=DigitalOceanContainerRegistry,
                attr_map={
                    "id": "name",
                    "urn": lambda r: container_registry_id(r["name"]),
                    "storage_usage_bytes": "storage_usage_bytes",
                    "is_read_only": "read_only",
                },
                search_map={
                    "_region":
                    ["urn", lambda registry: region_id(registry["region"])],
                },
            )
            repositories = self.client.list_registry_repositories(
                registry["name"])
            self.collect_resource(
                repositories,
                resource_class=DigitalOceanContainerRegistryRepository,
                attr_map={
                    "id":
                    "name",
                    "urn":
                    lambda r: container_registry_repository_id(
                        r["registry_name"], r["name"]),
                    "name":
                    "name",
                    "tag_count":
                    "tag_count",
                    "manifest_count":
                    "manifest_count",
                },
                search_map={
                    "__registry": [
                        "urn",
                        lambda r: container_registry_id(r["registry_name"]),
                    ],
                },
                predecessors={EdgeType.default: ["__registry"]},
            )

            tags = [
                tag for repository in repositories
                for tag in self.client.list_registry_repository_tags(
                    registry["name"], repository["name"])
            ]

            self.collect_resource(
                tags,
                resource_class=DigitalOceanContainerRegistryRepositoryTag,
                attr_map={
                    "id":
                    "tag",
                    "urn":
                    lambda t: container_registry_repository_tag_id(
                        t["registry_name"], t["repository"], t["tag"]),
                    "registry_name":
                    "registry_name",
                    "repository_name":
                    "repository",
                    "name":
                    "tag",
                    "manifest_digest":
                    "manifest_digest",
                    "compressed_size_bytes":
                    "compressed_size_bytes",
                    "size_bytes":
                    "size_bytes",
                },
                search_map={
                    "__repository": [
                        "urn",
                        lambda t: container_registry_repository_id(
                            t["registry_name"], t["repository"]),
                    ],
                    "__registry": [
                        "urn",
                        lambda t: container_registry_id(t["registry_name"]),
                    ],
                },
                predecessors={
                    EdgeType.default: ["__repository", "__registry"]
                },
            )

    @metrics_collect_ssh_keys.time()  # type: ignore
    def collect_ssh_keys(self) -> None:
        ssh_keys = self.client.list_ssh_keys()
        self.collect_resource(
            ssh_keys,
            resource_class=DigitalOceanSSHKey,
            attr_map={
                "id": lambda k: str(k["id"]),
                "urn": lambda k: ssh_key_id(k["id"]),
                "public_key": "public_key",
                "fingerprint": "fingerprint",
            },
        )

    @metrics_collect_tags.time()  # type: ignore
    def collect_tags(self) -> None:
        tags = self.client.list_tags()
        self.collect_resource(
            tags,
            resource_class=DigitalOceanTag,
            attr_map={
                "id": "name",
                "urn": lambda t: tag_id(t["name"]),
            },
        )

    @metrics_collect_domains.time()  # type: ignore
    def collect_domains(self) -> None:
        domains = self.client.list_domains()
        self.collect_resource(
            domains,
            resource_class=DigitalOceanDomain,
            attr_map={
                "id": "name",
                "urn": lambda d: domain_id(d["name"]),
                "ttl": "ttl",
                "zone_file": "zone_file",
            },
        )

        def update_record(record: Json, domain: Json) -> Json:
            record["domain_name"] = domain["name"]
            return record

        domain_records = [
            update_record(record, domain) for domain in domains
            for record in self.client.list_domain_records(domain["name"])
        ]
        self.collect_resource(
            domain_records,
            resource_class=DigitalOceanDomainRecord,
            attr_map={
                "id": lambda r: str(r["id"]),
                "name": "name",
                "urn": lambda r: domain_record_id(r["id"]),
                "domain_name": "domain_name",
                "record_type": "type",
                "record_data": "data",
                "record_priority": "priority",
                "record_port": "port",
                "record_ttl": "ttl",
                "record_weight": "weight",
                "record_flags": "flags",
                "record_tag": "tag",
            },
            search_map={
                "__domain": ["urn", lambda r: domain_id(r["domain_name"])],
            },
            predecessors={EdgeType.default: ["__domain"]},
        )

    @metrics_collect_firewalls.time()  # type: ignore
    def collect_firewalls(self) -> None:
        firewalls = self.client.list_firewalls()
        self.collect_resource(
            firewalls,
            resource_class=DigitalOceanFirewall,
            attr_map={
                "id": "id",
                "urn": lambda f: firewall_id(f["id"]),
                "firewall_status": "status",
            },
            search_map={
                "__droplets": [
                    "urn",
                    lambda f: list(
                        map(lambda id: droplet_id(id),
                            f.get("droplet_ids", []) or [])),
                ],
                "__tags": [
                    "urn",
                    lambda f: list(
                        map(lambda id: tag_id(id),
                            f.get("tags", []) or [])),
                ],
            },
            predecessors={
                EdgeType.default: ["__tags"],
            },
            successors={
                EdgeType.default: ["__droplets"],
            },
        )

    @metrics_collect_alert_policies.time()  # type: ignore
    def collect_alert_policies(self) -> None:
        alert_policies = self.client.list_alert_policies()
        self.collect_resource(
            alert_policies,
            resource_class=DigitalOceanAlertPolicy,
            attr_map={
                "id": "uuid",
                "urn": lambda ap: alert_policy_id(ap["uuid"]),
                "description": "description",
                "policy_type": "type",
                "is_enabled": "enabled",
            },
        )
Exemple #20
0
 def __init__(self) -> None:
     super().__init__()
     self.name = str(self.cloud)
     cloud = Cloud(self.cloud)
     self.root = cloud
     self.graph = Graph(root=self.root)