Example #1
0
def collect_plugin_graph(collector_plugin: BaseCollectorPlugin,
                         args=None) -> Optional[Graph]:
    collector: BaseCollectorPlugin = collector_plugin()
    collector_name = f"collector_{collector.cloud}"
    resotolib.signal.set_thread_name(collector_name)

    if args is not None:
        ArgumentParser.args = args
        setup_logger("resotoworker")

    log.debug(f"Starting new collect process for {collector.cloud}")
    start_time = time()
    collector.start()
    collector.join(ArgumentParser.args.timeout)
    elapsed = time() - start_time
    if not collector.is_alive():  # The plugin has finished its work
        if not collector.finished:
            log.error(f"Plugin {collector.cloud} did not finish collection"
                      " - ignoring plugin results")
            return None
        if not collector.graph.is_dag_per_edge_type():
            log.error(f"Graph of plugin {collector.cloud} is not acyclic"
                      " - ignoring plugin results")
            return None
        log.info(
            f"Collector of plugin {collector.cloud} finished in {elapsed:.4f}s"
        )
        return collector.graph
    else:
        log.error(
            f"Plugin {collector.cloud} timed out - discarding Plugin graph")
        return None
Example #2
0
 def __delitem__(self, key):
     if self.parent_resource and isinstance(self.parent_resource,
                                            BaseResource):
         log.debug(f"Calling parent resource to delete tag {key} in cloud")
         try:
             if self.parent_resource.delete_tag(key):
                 log_msg = f"Successfully deleted tag {key} in cloud"
                 self.parent_resource._changes.add("tags")
                 self.parent_resource.log(log_msg)
                 log.info((f"{log_msg} for {self.parent_resource.kind}"
                           f" {self.parent_resource.id}"))
                 return super().__delitem__(key)
             else:
                 log_msg = f"Error deleting tag {key} in cloud"
                 self.parent_resource.log(log_msg)
                 log.error((f"{log_msg} for {self.parent_resource.kind}"
                            f" {self.parent_resource.id}"))
         except Exception as e:
             log_msg = (
                 f"Unhandled exception while trying to delete tag {key} in cloud:"
                 f" {type(e)} {e}")
             self.parent_resource.log(log_msg, exception=e)
             if self.parent_resource._raise_tags_exceptions:
                 raise
             else:
                 log.exception(log_msg)
     else:
         return super().__delitem__(key)
Example #3
0
    def do_action(self, data: Dict) -> None:
        log.info("Protect Snowflakes called")
        self.config.read()

        cg = CoreGraph()
        resource_parts = []
        for cloud_id, accounts in self.config.items():
            for account_id, regions in accounts.items():
                for region_id, kinds in regions.items():
                    for kind, resources in kinds.items():
                        for resource_id in resources:
                            log.debug(
                                f"Protecting {resource_id} of kind {kind} in"
                                f" region {region_id} account {account_id}"
                                f" cloud {cloud_id}")
                            resource_parts.append(
                                f'(reported.id == "{resource_id}"'
                                f' and reported.kind == "{kind}"'
                                f' and metadata.ancestors.region.id == "{region_id}"'
                                f' and metadata.ancestors.cloud.id == "{cloud_id}")'
                            )
        resource_part = " or ".join(resource_parts)
        command = f"query {resource_part} | protect"
        for node_data in cg.execute(command):
            node = node_from_dict(node_data)
            log.debug(f"Protected {node.rtdname}")
Example #4
0
def update_model(graph: Graph,
                 resotocore_base_uri: str,
                 dump_json: bool = False,
                 tempdir: str = None) -> None:
    model_uri = f"{resotocore_base_uri}/model"

    log.debug(f"Updating model via {model_uri}")

    model_json = json.dumps(graph.export_model(), indent=4)

    if dump_json:
        ts = datetime.now().strftime("%Y-%m-%d-%H-%M")
        with tempfile.NamedTemporaryFile(
                prefix=f"resoto-model-{ts}-",
                suffix=".json",
                delete=not dump_json,
                dir=tempdir,
        ) as model_outfile:
            log.info(f"Writing model json to file {model_outfile.name}")
            model_outfile.write(model_json.encode())

    headers = {
        "Content-Type": "application/json",
    }
    if getattr(ArgumentParser.args, "psk", None):
        encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk)

    r = requests.patch(model_uri, data=model_json, headers=headers)
    if r.status_code != 200:
        log.error(r.content)
        raise RuntimeError(f"Failed to create model: {r.content}")
Example #5
0
def get_org_accounts(filter_current_account=False):
    session = aws_session()
    client = session.client("organizations")
    accounts = []
    try:
        response = client.list_accounts()
        accounts = response.get("Accounts", [])
        while response.get("NextToken") is not None:
            response = client.list_accounts(NextToken=response["NextToken"])
            accounts.extend(response.get("Accounts", []))
    except botocore.exceptions.ClientError as e:
        if e.response["Error"]["Code"] == "AccessDeniedException":
            log.error(
                "AWS error - missing permissions to list organization accounts"
            )
        else:
            raise
    filter_account_id = current_account_id() if filter_current_account else -1
    accounts = [
        aws_account["Id"] for aws_account in accounts
        if aws_account["Id"] != filter_account_id
    ]
    for account in accounts:
        log.debug(f"AWS found org account {account}")
    log.info(f"AWS found a total of {len(accounts)} org accounts")
    return accounts
Example #6
0
 def run(self) -> None:
     self.name = self.identifier
     add_event_listener(EventType.SHUTDOWN, self.shutdown)
     while not self.shutdown_event.is_set():
         log.info("Connecting to resotocore message bus")
         try:
             self.connect()
         except Exception as e:
             log.error(e)
         time.sleep(10)
Example #7
0
    def pre_cleanup(self, graph=None) -> bool:
        if not hasattr(self, "pre_delete"):
            return True

        if graph is None:
            graph = self._graph

        if self.phantom:
            raise RuntimeError(
                f"Can't cleanup phantom resource {self.rtdname}")

        if self.cleaned:
            log.debug(f"Resource {self.rtdname} has already been cleaned up")
            return True

        account = self.account(graph)
        region = self.region(graph)
        if not isinstance(account, BaseAccount) or not isinstance(
                region, BaseRegion):
            log.error(
                ("Could not determine account or region for pre cleanup of"
                 f" {self.rtdname}"))
            return False

        log_suffix = f" in account {account.dname} region {region.name}"
        self.log("Trying to run pre clean up")
        log.debug(f"Trying to run pre clean up {self.rtdname}{log_suffix}")
        try:
            if not getattr(self, "pre_delete")(graph):
                self.log("Failed to run pre clean up")
                log.error(
                    f"Failed to run pre clean up {self.rtdname}{log_suffix}")
                return False
            self.log("Successfully ran pre clean up")
            log.info(
                f"Successfully ran pre clean up {self.rtdname}{log_suffix}")
        except Exception as e:
            self.log("An error occurred during pre clean up", exception=e)
            log.exception(
                f"An error occurred during pre clean up {self.rtdname}{log_suffix}"
            )
            cloud = self.cloud(graph)
            metrics_resource_pre_cleanup_exceptions.labels(
                cloud=cloud.name,
                account=account.dname,
                region=region.name,
                kind=self.kind,
            ).inc()
            return False
        return True
Example #8
0
    def run(self) -> None:
        self.name = self.identifier
        add_event_listener(EventType.SHUTDOWN, self.shutdown)

        for i in range(self.max_workers):
            threading.Thread(
                target=self.worker, daemon=True, name=f"worker-{i}"
            ).start()

        while not self.shutdown_event.is_set():
            log.info("Connecting to resotocore task queue")
            try:
                self.connect()
            except Exception as e:
                log.error(e)
            time.sleep(10)
Example #9
0
 def volumes_cleanup(self, graph: Graph):
     log.info("Volume Cleanup called")
     for node in graph.nodes:
         if (isinstance(node, BaseVolume)
                 and node.volume_status == "available"
                 and node.age > self.age and node.last_access is not None
                 and node.last_update is not None
                 and node.last_access > self.age
                 and node.last_update > self.age):
             cloud = node.cloud(graph)
             account = node.account(graph)
             region = node.region(graph)
             log.debug((
                 f"Found available volume {node.dname} in cloud {cloud.name} account {account.dname} "
                 f"region {region.name} with age {node.age}. Last update was {node.last_update} ago "
                 f"and last access {node.last_access} ago both of which is longer than {self.age} "
                 f"- setting to be cleaned"))
             node.clean = True
Example #10
0
    def cleanup(self) -> None:
        if not ArgumentParser.args.cleanup:
            log.error(
                ("Cleanup called but --cleanup flag not provided at startup"
                 " - ignoring call"))
            return

        log.info("Running cleanup")
        # create a subgraph of all the nodes that have a delete edge
        delete_graph = DiGraph(self.graph.edge_type_subgraph(EdgeType.delete))
        # from that graph delete all the nodes not marked for cleanup
        for node in list(delete_graph.nodes):
            if not node.clean:
                delete_graph.remove_node(node)
        # add all the nodes that are supposed to be cleaned
        # but do not have a delete edge so weren't part of the
        # subgraph
        for node in self.graph.nodes:
            if node.clean and node not in delete_graph:
                delete_graph.add_node(node)
        cleanup_nodes = list(delete_graph.nodes)

        for node in cleanup_nodes:
            log.debug(f"Adding {node.rtdname} to cleanup plan")

        log.debug(f"Sending {len(cleanup_nodes)} nodes to pre-cleanup pool")
        with ThreadPoolExecutor(
                max_workers=ArgumentParser.args.cleanup_pool_size,
                thread_name_prefix="pre_cleaner",
        ) as executor:
            executor.map(self.pre_clean, cleanup_nodes)

        log.debug(f"Running parallel cleanup on {len(cleanup_nodes)} nodes")
        parallel_pass_num = 1
        for nodes in dependent_node_iterator(delete_graph):
            log.debug(
                f"Cleaning {len(nodes)} nodes in {ordinal(parallel_pass_num)} pass"
            )
            with ThreadPoolExecutor(
                    max_workers=ArgumentParser.args.cleanup_pool_size,
                    thread_name_prefix="cleaner",
            ) as executor:
                executor.map(self.clean, nodes)
            parallel_pass_num += 1
Example #11
0
    def alarm_cleanup(self, graph: Graph):
        log.info("AWS Cloudwatch Alarms cleanup called")
        for node in graph.nodes:
            if node.protected or not isinstance(node, AWSCloudwatchAlarm):
                continue

            cloud = node.cloud(graph)
            account = node.account(graph)
            region = node.region(graph)
            log_prefix = (
                f"Found {node.rtdname} in cloud {cloud.name} account {account.dname} "
                f"region {region.name}.")

            if len(self.config) > 0:
                if (cloud.id not in self.config
                        or account.id not in self.config[cloud.id]):
                    log.debug((
                        f"{log_prefix} Account not found in config - ignoring."
                    ))
                    continue

            should_clean = False
            i = None
            log_msg = log_prefix
            for dimension in node.dimensions:
                if dimension.get("Name") == "InstanceId":
                    instance_id = dimension.get("Value")
                    i = graph.search_first_all({
                        "kind": "aws_ec2_instance",
                        "id": instance_id
                    })
                    if isinstance(
                            i, AWSEC2Instance) and i.instance_status not in (
                                "terminated"):
                        should_clean = False
                        break
                    else:
                        should_clean = True
                        log_msg += f" Referenced EC2 instance {instance_id} not found."

            if not should_clean:
                continue
            log.debug(f"{log_msg} - cleaning alarm")
            node.clean = True
Example #12
0
    def collect(self) -> None:
        """This method is being called by resoto whenever the collector runs

        It is responsible for querying the cloud APIs for remote resources and adding
        them to the plugin graph.
        The graph root (self.graph.root) must always be followed by one or more
        accounts. An account must always be followed by a region.
        A region can contain arbitrary resources.
        """
        tokens = ArgumentParser.args.digitalocean_api_tokens
        spaces_access_keys: List[
            str
        ] = ArgumentParser.args.digitalocean_spaces_access_keys
        spaces_keys: List[Tuple[Optional[str], Optional[str]]] = []

        def spaces_keys_valid(keys: List[str]) -> bool:
            return all([len(key.split(":")) == 2 for key in keys])

        if not spaces_keys_valid(spaces_access_keys):
            log.warn(
                "DigitalOcean Spaces access keys must be provided in pairs of access_key:secret_key"
            )
        else:

            def key_to_tuple(key: str) -> Tuple[str, str]:
                splitted = key.split(":")
                return splitted[0], splitted[1]

            spaces_keys = [key_to_tuple(key) for key in spaces_access_keys]

        if len(tokens) != len(spaces_access_keys):
            log.warn(
                "The number of DigitalOcean API tokens and DigitalOcean Spaces access keys must be equal."
                + "Missing or extra spaces access keys will be ignored."
            )
            spaces_keys = spaces_keys[: len(tokens)]
            spaces_keys.extend([(None, None)] * (len(tokens) - len(spaces_keys)))

        log.info(f"plugin: collecting DigitalOcean resources for {len(tokens)} teams")
        for token, space_key_tuple in zip(tokens, spaces_keys):
            client = StreamingWrapper(token, space_key_tuple[0], space_key_tuple[1])
            team_graph = self.collect_team(client)
            self.graph.merge(team_graph)
Example #13
0
def cleanup():
    """Run resource cleanup"""

    log.info("Running cleanup")

    cg = CoreGraph()

    query_filter = ""
    if ArgumentParser.args.collector and len(
            ArgumentParser.args.collector) > 0:
        clouds = '["' + '", "'.join(ArgumentParser.args.collector) + '"]'
        query_filter = f"and /ancestors.cloud.reported.id in {clouds} "
    query = (
        f"/desired.clean == true and /metadata.cleaned != true"
        f" and /metadata.protected!=true {query_filter}<-default,delete[0:]->")

    graph = cg.graph(query)
    cleaner = Cleaner(graph)
    cleaner.cleanup()
    cg.patch_nodes(graph)
Example #14
0
class GraphExportIterator:
    def __init__(self, graph: Graph, delete_tempfile: bool = True, tempdir: str = None):
        self.graph = graph
        ts = datetime.now().strftime("%Y-%m-%d-%H-%M")
        self.tempfile = tempfile.NamedTemporaryFile(
            prefix=f"resoto-graph-{ts}-",
            suffix=".ndjson",
            delete=delete_tempfile,
            dir=tempdir,
        )
        if not delete_tempfile:
            log.info(f"Writing graph json to file {self.tempfile.name}")
        self.graph_merge_kind = BaseCloud
        gmk = getattr(ArgumentParser.args, "graph_merge_kind", "cloud")
        if gmk == "account":
            self.graph_merge_kind = BaseAccount
        self.graph_exported = False
        self.export_lock = threading.Lock()
        self.total_lines = 0
        self.number_of_nodes = int(graph.number_of_nodes())
        self.number_of_edges = int(graph.number_of_edges())

    def __del__(self):
        try:
            self.tempfile.close()
        except Exception:
            pass

    def __iter__(self):
        if not self.graph_exported:
            self.export_graph()
        start_time = time()
        last_sent = time()
        lines_sent = 0
        percent = 0
        report_every = round(self.total_lines / 10)

        while line := self.tempfile.readline():
            lines_sent += 1
            if report_every > 0 and lines_sent > 0 and lines_sent % report_every == 0:
                percent = round(lines_sent / self.total_lines * 100)
                elapsed = time() - last_sent
                log.debug(
                    f"Sent {lines_sent}/{self.total_lines} nodes and edges ({percent}%) - {elapsed:.4f}s"
                )
                last_sent = time()
            yield line
        self.tempfile.seek(0)
        elapsed = time() - start_time
        log.info(
            f"Sent {lines_sent}/{self.total_lines},"
            f" {self.number_of_nodes} nodes and {self.number_of_edges} edges"
            f" in {elapsed:.4f}s"
        )
Example #15
0
 def __init__(self, graph: Graph, delete_tempfile: bool = True, tempdir: str = None):
     self.graph = graph
     ts = datetime.now().strftime("%Y-%m-%d-%H-%M")
     self.tempfile = tempfile.NamedTemporaryFile(
         prefix=f"resoto-graph-{ts}-",
         suffix=".ndjson",
         delete=delete_tempfile,
         dir=tempdir,
     )
     if not delete_tempfile:
         log.info(f"Writing graph json to file {self.tempfile.name}")
     self.graph_merge_kind = BaseCloud
     gmk = getattr(ArgumentParser.args, "graph_merge_kind", "cloud")
     if gmk == "account":
         self.graph_merge_kind = BaseAccount
     self.graph_exported = False
     self.export_lock = threading.Lock()
     self.total_lines = 0
     self.number_of_nodes = int(graph.number_of_nodes())
     self.number_of_edges = int(graph.number_of_edges())
Example #16
0
def shutdown(event: Event) -> None:
    reason = event.data.get("reason")
    emergency = event.data.get("emergency")

    if emergency:
        resotolib.signal.emergency_shutdown(reason)

    current_pid = os.getpid()
    if current_pid != resotolib.signal.parent_pid:
        return

    if reason is None:
        reason = "unknown reason"
    log.info(
        (
            f"Received shut down event {event.event_type}:"
            f" {reason} - killing all threads and child processes"
        )
    )
    shutdown_event.set()  # and then end the program
Example #17
0
 def export_graph(self):
     with self.export_lock:
         start_time = time()
         for node in self.graph.nodes:
             node_dict = node_to_dict(node)
             if isinstance(node, self.graph_merge_kind):
                 log.debug(f"Replacing sub graph below {node.rtdname}")
                 if "metadata" not in node_dict or not isinstance(
                     node_dict["metadata"], dict
                 ):
                     node_dict["metadata"] = {}
                 node_dict["metadata"]["replace"] = True
             node_json = json.dumps(node_dict) + "\n"
             self.tempfile.write(node_json.encode())
             self.total_lines += 1
         elapsed_nodes = time() - start_time
         log.debug(f"Exported {self.number_of_nodes} nodes in {elapsed_nodes:.4f}s")
         start_time = time()
         for edge in self.graph.edges:
             from_node = edge[0]
             to_node = edge[1]
             if not isinstance(from_node, BaseResource) or not isinstance(
                 to_node, BaseResource
             ):
                 log.error(f"One of {from_node} and {to_node} is no base resource")
                 continue
             edge_dict = {"from": from_node.chksum, "to": to_node.chksum}
             if len(edge) == 3:
                 key = edge[2]
                 if isinstance(key, EdgeKey) and key.edge_type != EdgeType.default:
                     edge_dict["edge_type"] = key.edge_type.value
             edge_json = json.dumps(edge_dict) + "\n"
             self.tempfile.write(edge_json.encode())
             self.total_lines += 1
         elapsed_edges = time() - start_time
         log.debug(f"Exported {self.number_of_edges} edges in {elapsed_edges:.4f}s")
         elapsed = elapsed_nodes + elapsed_edges
         log.info(f"Exported {self.total_lines} nodes and edges in {elapsed:.4f}s")
         self.graph_exported = True
         del self.graph
         self.tempfile.seek(0)
Example #18
0
def send_to_resotocore(graph: Graph):
    if not ArgumentParser.args.resotocore_uri:
        return

    log.info("resotocore Event Handler called")

    base_uri = ArgumentParser.args.resotocore_uri.strip("/")
    resotocore_graph = ArgumentParser.args.resotocore_graph
    dump_json = ArgumentParser.args.debug_dump_json
    tempdir = ArgumentParser.args.tempdir

    create_graph(base_uri, resotocore_graph)
    update_model(graph, base_uri, dump_json=dump_json, tempdir=tempdir)

    graph_export_iterator = GraphExportIterator(graph,
                                                delete_tempfile=not dump_json,
                                                tempdir=tempdir)
    #  The graph is not required any longer and can be released.
    del graph
    graph_export_iterator.export_graph()
    send_graph(graph_export_iterator, base_uri, resotocore_graph)
Example #19
0
def core_actions_processor(
    collectors: List[BaseCollectorPlugin], message: Dict
) -> None:
    if not isinstance(message, dict):
        log.error(f"Invalid message: {message}")
        return
    kind = message.get("kind")
    message_type = message.get("message_type")
    data = message.get("data")
    log.debug(f"Received message of kind {kind}, type {message_type}, data: {data}")
    if kind == "action":
        try:
            if message_type == "collect":
                start_time = time.time()
                collect_and_send(collectors)
                run_time = int(time.time() - start_time)
                log.info(f"Collect ran for {run_time} seconds")
            elif message_type == "cleanup":
                start_time = time.time()
                cleanup()
                run_time = int(time.time() - start_time)
                log.info(f"Cleanup ran for {run_time} seconds")
            else:
                raise ValueError(f"Unknown message type {message_type}")
        except Exception as e:
            log.exception(f"Failed to {message_type}: {e}")
            reply_kind = "action_error"
        else:
            reply_kind = "action_done"

        reply_message = {
            "kind": reply_kind,
            "message_type": message_type,
            "data": data,
        }
        return reply_message
Example #20
0
    def loadbalancer_cleanup(self, graph: Graph):
        log.info("AWS Loadbalancers Cleanup called")
        for node in graph.nodes:
            if (not isinstance(node, AWSELB) and not isinstance(node, AWSALB)
                    and not isinstance(node, AWSALBTargetGroup)):
                continue

            if node.age < self.age:
                continue

            if node.tags.get("expiration") == "never":
                continue

            cloud = node.cloud(graph)
            account = node.account(graph)
            region = node.region(graph)

            if (isinstance(node, AWSELB) and len([
                    i for i in node.predecessors(graph,
                                                 edge_type=EdgeType.delete)
                    if isinstance(i, AWSEC2Instance)
                    and i.instance_status != "terminated"
            ]) == 0 and len(node.backends) == 0):
                log.debug((
                    f"Found orphaned AWS ELB {node.dname} in cloud {cloud.name} account {account.dname} "
                    f"region {region.name} with age {node.age} and no EC2 instances attached to it."
                ))
                node.clean = True
            elif (isinstance(node, AWSALB) and len([
                    n for n in node.predecessors(graph,
                                                 edge_type=EdgeType.delete)
                    if isinstance(n, AWSALBTargetGroup)
            ]) == 0 and len(node.backends) == 0):
                log.debug((
                    f"Found orphaned AWS ALB {node.dname} in cloud {cloud.name} account {account.dname} "
                    f"region {region.name} with age {node.age} and no Target Groups attached to it."
                ))
                node.clean = True
            elif (isinstance(node, AWSALBTargetGroup) and len(
                    list(node.successors(graph, edge_type=EdgeType.delete)))
                  == 0):
                log.debug((
                    f"Found orphaned AWS ALB Target Group {node.dname} in cloud {cloud.name} "
                    f"account {account.dname} region {region.name} with age {node.age}"
                ))
                node.clean = True
            elif isinstance(node, AWSALB):
                cleanup_alb = True
                target_groups = [
                    n for n in node.predecessors(graph,
                                                 edge_type=EdgeType.delete)
                    if isinstance(n, AWSALBTargetGroup)
                ]

                if len(node.backends) > 0:
                    cleanup_alb = False

                for tg in target_groups:
                    if (tg.target_type != "instance" or tg.age < self.age
                            or len([
                                i for i in tg.predecessors(
                                    graph, edge_type=EdgeType.delete)
                                if isinstance(i, AWSEC2Instance)
                                and i.instance_status != "terminated"
                            ]) > 0):
                        cleanup_alb = False

                if cleanup_alb:
                    log.debug((
                        f"Found AWS ALB {node.dname} in cloud {cloud.name} account {account.dname} "
                        f"region {region.name} with age {node.age} and no EC2 instances attached "
                        f"to its {len(target_groups)} target groups."))
                    for tg in target_groups:
                        tg.clean = True
                    node.clean = True
Example #21
0
    def vpc_cleanup(self, graph: Graph):
        log.info("AWS VPC cleanup called")
        for node in graph.nodes:
            if node.protected or not node.clean or not isinstance(node, AWSVPC):
                continue

            cloud = node.cloud(graph)
            account = node.account(graph)
            region = node.region(graph)
            log_prefix = (
                f"Found AWS VPC {node.dname} in cloud {cloud.name} account {account.dname} "
                f"region {region.name} marked for cleanup."
            )

            if len(self.config) > 0:
                if (
                    cloud.id not in self.config
                    or account.id not in self.config[cloud.id]
                ):
                    log.debug(
                        (
                            f"{log_prefix} Account not found in config - ignoring dependent resources."
                        )
                    )
                    continue

            vpc_instances = [
                i
                for i in node.descendants(graph, edge_type=EdgeType.delete)
                if isinstance(i, AWSEC2Instance)
                and i.instance_status not in ("shutting-down", "terminated")
                and not i.clean
            ]
            if len(vpc_instances) > 0:
                log_msg = "VPC contains active EC2 instances - not cleaning VPC."
                log.debug(f"{log_prefix} {log_msg}")
                node.log(log_msg)
                node.clean = False
                continue

            log.debug(f"{log_prefix} Marking dependent resources for cleanup as well.")

            for descendant in node.descendants(graph, edge_type=EdgeType.delete):
                log.debug(f"Found descendant {descendant.rtdname} of VPC {node.dname}")
                if isinstance(
                    descendant,
                    (
                        AWSVPCPeeringConnection,
                        AWSEC2NetworkAcl,
                        AWSEC2NetworkInterface,
                        AWSELB,
                        AWSALB,
                        AWSALBTargetGroup,
                        AWSEC2Subnet,
                        AWSEC2SecurityGroup,
                        AWSEC2InternetGateway,
                        AWSEC2NATGateway,
                        AWSEC2RouteTable,
                        AWSVPCEndpoint,
                        AWSEC2ElasticIP,
                    ),
                ):
                    descendant.log(
                        (
                            f"Marking for cleanup because resource is a descendant of VPC {node.dname} "
                            f"which is set to be cleaned"
                        )
                    )
                    node.log(
                        f"Marking {descendant.rtdname} for cleanup because resource is a descendant"
                    )
                    descendant.clean = True
                else:
                    if descendant.clean:
                        log.debug(
                            (
                                f"Descendant {descendant.rtdname} of VPC {node.dname} is not targeted but "
                                f"already marked for cleaning"
                            )
                        )
                    else:
                        log.error(
                            (
                                f"Descendant {descendant.rtdname} of VPC {node.dname} is not targeted and "
                                f"not marked for cleaning - VPC cleanup will likely fail"
                            )
                        )
                        node.log(
                            (
                                f"Descendant {descendant.rtdname} is not targeted and not marked for cleaning "
                                f"- cleanup will likely fail"
                            )
                        )
Example #22
0
    def do_action(self, data: Dict) -> None:
        log.info("Tag Validator called")
        self.config.read()

        cg = CoreGraph()

        query_tag = "tagvalidate"
        exclusion_part = "metadata.protected == false and metadata.phantom == false and metadata.cleaned == false"
        tags_part = "has_key(reported.tags, expiration)"
        kinds_part = 'reported.kind in ["' + '", "'.join(
            self.config["kinds"]) + '"]'
        account_parts = []
        for cloud_id, account in self.config["accounts"].items():
            for account_id in account.keys():
                account_part = (
                    f'(metadata.ancestors.cloud.id == "{cloud_id}" and '
                    f'metadata.ancestors.account.id == "{account_id}")')
                account_parts.append(account_part)
        accounts_part = "(" + " or ".join(account_parts) + ")"
        query = f"{exclusion_part} and {kinds_part} and {tags_part} and {accounts_part} #{query_tag} <-[0:]-"

        graph = cg.graph(query)
        commands = []
        for node in graph.nodes:
            cloud = node.cloud(graph)
            account = node.account(graph)
            region = node.region(graph)
            if node.protected or node._resotocore_query_tag != query_tag:
                continue
            update_node_tag = False
            max_expiration = (self.config["accounts"].get(cloud.id, {}).get(
                account.id, {}).get("expiration"))
            max_expiration_str = delta_to_str(max_expiration)
            node_expiration_str = node.tags.get("expiration")
            try:
                node_expiration = parse_delta(node_expiration_str)
            except (AssertionError, ValueError):
                log_msg = (
                    f"Invalid expiration tag value {node_expiration_str}"
                    f" - updating tag to {max_expiration_str}")
                node.log(log_msg)
                log.error(f"{log_msg} on {node.rtdname} in {cloud.rtdname}"
                          f" {account.rtdname} {region.rtdname}")
                update_node_tag = True
            else:
                if max_expiration < node_expiration:
                    log_msg = (
                        f"Current expiration tag value {node_expiration_str} is larger"
                        f" than {max_expiration_str} - updating tag")
                    node.log(log_msg)
                    log.error(f"{log_msg} on {node.rtdname}")
                    update_node_tag = True
            if update_node_tag:
                commands.append(
                    f"query id({node._resotocore_id}) | tag update --nowait expiration {max_expiration_str}"
                )
        cg.patch_nodes(graph)
        for command in commands:
            if ArgumentParser.args.tagvalidator_dry_run:
                log.debug(f"Tag validator dry run - not executing: {command}")
                continue
            for response in cg.execute(command):
                log.debug(f"Response: {response}")
Example #23
0
def main() -> None:
    setup_logger("resotoworker")
    # Try to run in a new process group and
    # ignore if not possible for whatever reason
    try:
        os.setpgid(0, 0)
    except Exception:
        pass

    resotolib.signal.parent_pid = os.getpid()

    # Add cli args
    # The following double parsing of cli args is done so that when
    # a user specifies e.g. `--collector aws --help`  they would
    # no longer be shown cli args for other collectors like gcp.
    collector_arg_parser = ArgumentParser(
        description="resoto worker",
        env_args_prefix="RESOTOWORKER_",
        add_help=False,
        add_machine_help=False,
    )
    PluginLoader.add_args(collector_arg_parser)
    (args, _) = collector_arg_parser.parse_known_args()
    ArgumentParser.args = args

    arg_parser = ArgumentParser(
        description="resoto worker",
        env_args_prefix="RESOTOWORKER_",
    )
    jwt_add_args(arg_parser)
    logging_add_args(arg_parser)
    graph_add_args(arg_parser)
    collect_add_args(arg_parser)
    cleanup_add_args(arg_parser)
    core_add_args(arg_parser)
    resotocore_add_args(arg_parser)
    CoreActions.add_args(arg_parser)
    WebApp.add_args(arg_parser)
    PluginLoader.add_args(arg_parser)
    event_add_args(arg_parser)
    add_args(arg_parser)

    # Find resoto Plugins in the resoto.plugins module
    plugin_loader = PluginLoader()
    plugin_loader.add_plugin_args(arg_parser)

    # At this point the CLI, all Plugins as well as the WebServer have
    # added their args to the arg parser
    arg_parser.parse_args()

    # Handle Ctrl+c and other means of termination/shutdown
    resotolib.signal.initializer()
    add_event_listener(EventType.SHUTDOWN, shutdown, blocking=False)

    # Try to increase nofile and nproc limits
    increase_limits()

    web_server = WebServer(WebApp())
    web_server.daemon = True
    web_server.start()

    core_actions = CoreActions(
        identifier=f"{ArgumentParser.args.resotocore_subscriber_id}-collect_cleanup",
        resotocore_uri=ArgumentParser.args.resotocore_uri,
        resotocore_ws_uri=ArgumentParser.args.resotocore_ws_uri,
        actions={
            "collect": {
                "timeout": ArgumentParser.args.timeout,
                "wait_for_completion": True,
            },
            "cleanup": {
                "timeout": ArgumentParser.args.timeout,
                "wait_for_completion": True,
            },
        },
        message_processor=partial(
            core_actions_processor, plugin_loader.plugins(PluginType.COLLECTOR)
        ),
    )

    task_queue_filter = {}
    if ArgumentParser.args.collector and len(ArgumentParser.args.collector) > 0:
        task_queue_filter = {"cloud": list(ArgumentParser.args.collector)}
    core_tasks = CoreTasks(
        identifier="workerd-tasks",
        resotocore_ws_uri=ArgumentParser.args.resotocore_ws_uri,
        tasks=["tag"],
        task_queue_filter=task_queue_filter,
        message_processor=core_tag_tasks_processor,
    )
    core_actions.start()
    core_tasks.start()

    for Plugin in plugin_loader.plugins(PluginType.ACTION):
        try:
            log.debug(f"Starting action plugin {Plugin}")
            plugin = Plugin()
            plugin.start()
        except Exception as e:
            log.exception(f"Caught unhandled persistent Plugin exception {e}")

    # We wait for the shutdown Event to be set() and then end the program
    # While doing so we print the list of active threads once per 15 minutes
    shutdown_event.wait()
    web_server.shutdown()
    time.sleep(1)  # everything gets 1000ms to shutdown gracefully before we force it
    resotolib.signal.kill_children(resotolib.signal.SIGTERM, ensure_death=True)
    log.info("Shutdown complete")
    os._exit(0)
Example #24
0
def handler(sig, frame) -> None:
    log.info("Shutting down")
    shutdown_event.set()