예제 #1
0
def send_graph(
    graph_export_iterator: GraphExportIterator,
    resotocore_base_uri: str,
    resotocore_graph: str,
):
    merge_uri = f"{resotocore_base_uri}/graph/{resotocore_graph}/merge"

    log.debug(f"Sending graph via {merge_uri}")

    headers = {
        "Content-Type": "application/x-ndjson",
        "Resoto-Worker-Nodes": str(graph_export_iterator.number_of_nodes),
        "Resoto-Worker-Edges": str(graph_export_iterator.number_of_edges),
    }
    if getattr(ArgumentParser.args, "psk", None):
        encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk)

    r = requests.post(
        merge_uri,
        data=graph_export_iterator,
        headers=headers,
    )
    if r.status_code != 200:
        log.error(r.content)
        raise RuntimeError(f"Failed to send graph: {r.content}")
    log.debug(f"resotocore reply: {r.content.decode()}")
    log.debug(f"Sent {graph_export_iterator.total_lines} items to resotocore")
예제 #2
0
def core_actions_processor(metrics: Metrics, query_uri: str,
                           message: dict) -> None:
    if not isinstance(message, dict):
        log.error(f"Invalid message: {message}")
        return
    kind = message.get("kind")
    message_type = message.get("message_type")
    data = message.get("data")
    log.debug(
        f"Received message of kind {kind}, type {message_type}, data: {data}")
    if kind == "action":
        try:
            if message_type == "generate_metrics":
                start_time = time.time()
                update_metrics(metrics, query_uri)
                run_time = time.time() - start_time
                log.debug(f"Updated metrics for {run_time:.2f} seconds")
            else:
                raise ValueError(f"Unknown message type {message_type}")
        except Exception as e:
            log.exception(f"Failed to {message_type}: {e}")
            reply_kind = "action_error"
        else:
            reply_kind = "action_done"

        reply_message = {
            "kind": reply_kind,
            "message_type": message_type,
            "data": data,
        }
        return reply_message
예제 #3
0
def get_org_accounts(filter_current_account=False):
    session = aws_session()
    client = session.client("organizations")
    accounts = []
    try:
        response = client.list_accounts()
        accounts = response.get("Accounts", [])
        while response.get("NextToken") is not None:
            response = client.list_accounts(NextToken=response["NextToken"])
            accounts.extend(response.get("Accounts", []))
    except botocore.exceptions.ClientError as e:
        if e.response["Error"]["Code"] == "AccessDeniedException":
            log.error(
                "AWS error - missing permissions to list organization accounts"
            )
        else:
            raise
    filter_account_id = current_account_id() if filter_current_account else -1
    accounts = [
        aws_account["Id"] for aws_account in accounts
        if aws_account["Id"] != filter_account_id
    ]
    for account in accounts:
        log.debug(f"AWS found org account {account}")
    log.info(f"AWS found a total of {len(accounts)} org accounts")
    return accounts
예제 #4
0
 def __delitem__(self, key):
     if self.parent_resource and isinstance(self.parent_resource,
                                            BaseResource):
         log.debug(f"Calling parent resource to delete tag {key} in cloud")
         try:
             if self.parent_resource.delete_tag(key):
                 log_msg = f"Successfully deleted tag {key} in cloud"
                 self.parent_resource._changes.add("tags")
                 self.parent_resource.log(log_msg)
                 log.info((f"{log_msg} for {self.parent_resource.kind}"
                           f" {self.parent_resource.id}"))
                 return super().__delitem__(key)
             else:
                 log_msg = f"Error deleting tag {key} in cloud"
                 self.parent_resource.log(log_msg)
                 log.error((f"{log_msg} for {self.parent_resource.kind}"
                            f" {self.parent_resource.id}"))
         except Exception as e:
             log_msg = (
                 f"Unhandled exception while trying to delete tag {key} in cloud:"
                 f" {type(e)} {e}")
             self.parent_resource.log(log_msg, exception=e)
             if self.parent_resource._raise_tags_exceptions:
                 raise
             else:
                 log.exception(log_msg)
     else:
         return super().__delitem__(key)
예제 #5
0
    def graph(self, query: str) -> Graph:
        def process_data_line(data: dict, graph: Graph):
            """Process a single line of resotocore graph data"""

            if data.get("type") == "node":
                node_id = data.get("id")
                node = node_from_dict(data)
                node_mapping[node_id] = node
                log.debug(f"Adding node {node} to the graph")
                graph.add_node(node)
                if node.kind == "graph_root":
                    log.debug(f"Setting graph root {node}")
                    graph.root = node
            elif data.get("type") == "edge":
                node_from = data.get("from")
                node_to = data.get("to")
                edge_type = EdgeType.from_value(data.get("edge_type"))
                if node_from not in node_mapping or node_to not in node_mapping:
                    raise ValueError(f"One of {node_from} -> {node_to} unknown")
                graph.add_edge(
                    node_mapping[node_from], node_mapping[node_to], edge_type=edge_type
                )

        graph = Graph()
        node_mapping = {}
        for data in self.query(query):
            try:
                process_data_line(data, graph)
            except ValueError as e:
                log.error(e)
                continue
        sanitize(graph)
        return graph
예제 #6
0
def add_event_listener(
    event_type: EventType,
    listener: Callable,
    blocking: bool = False,
    timeout: int = None,
    one_shot: bool = False,
) -> bool:
    """Add an Event Listener"""
    if not callable(listener):
        log.error(
            f"Error registering {listener} of type {type(listener)} with event"
            f" {event_type.name}")
        return False

    if timeout is None:
        if hasattr(ArgumentParser.args, "event_timeout"):
            timeout = ArgumentParser.args.event_timeout
        else:
            timeout = 900

    log.debug(f"Registering {listener} with event {event_type.name}"
              f" (blocking: {blocking}, one-shot: {one_shot})")
    with _events_lock.write_access:
        if not event_listener_registered(event_type, listener):
            _events[event_type][listener] = {
                "blocking": blocking,
                "timeout": timeout,
                "one-shot": one_shot,
                "lock": Lock(),
                "pid": os.getpid(),
            }
            return True
        return False
예제 #7
0
def update_model(graph: Graph,
                 resotocore_base_uri: str,
                 dump_json: bool = False,
                 tempdir: str = None) -> None:
    model_uri = f"{resotocore_base_uri}/model"

    log.debug(f"Updating model via {model_uri}")

    model_json = json.dumps(graph.export_model(), indent=4)

    if dump_json:
        ts = datetime.now().strftime("%Y-%m-%d-%H-%M")
        with tempfile.NamedTemporaryFile(
                prefix=f"resoto-model-{ts}-",
                suffix=".json",
                delete=not dump_json,
                dir=tempdir,
        ) as model_outfile:
            log.info(f"Writing model json to file {model_outfile.name}")
            model_outfile.write(model_json.encode())

    headers = {
        "Content-Type": "application/json",
    }
    if getattr(ArgumentParser.args, "psk", None):
        encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk)

    r = requests.patch(model_uri, data=model_json, headers=headers)
    if r.status_code != 200:
        log.error(r.content)
        raise RuntimeError(f"Failed to create model: {r.content}")
예제 #8
0
def dispatch_event(event: Event, blocking: bool = False) -> None:
    """Dispatch an Event"""
    waiting_str = "" if blocking else "not "
    log.debug(
        f"Dispatching event {event.event_type.name} and {waiting_str}waiting for"
        " listeners to return")

    if event.event_type not in _events.keys():
        return

    with _events_lock.read_access:
        # Event listeners might unregister themselves during event dispatch
        # so we will work on a shallow copy while processing the current event.
        listeners = dict(_events[event.event_type])

    threads = {}
    for listener, listener_data in listeners.items():
        try:
            if listener_data["pid"] != os.getpid():
                continue

            if listener_data["one-shot"] and not listener_data["lock"].acquire(
                    blocking=False):
                log.error(f"Not calling one-shot listener {listener} of type"
                          f" {type(listener)} - can't acquire lock")
                continue

            log.debug(f"Calling listener {listener} of type {type(listener)}"
                      f" (blocking: {listener_data['blocking']})")
            thread_name = (f"{event.event_type.name.lower()}_event"
                           f"-{getattr(listener, '__name__', 'anonymous')}")
            t = Thread(target=listener, args=[event], name=thread_name)
            if blocking or listener_data["blocking"]:
                threads[t] = listener
            t.start()
        except Exception:
            log.exception("Caught unhandled event callback exception")
        finally:
            if listener_data["one-shot"]:
                log.debug(
                    f"One-shot specified for event {event.event_type.name} "
                    f"listener {listener} - removing event listener")
                remove_event_listener(event.event_type, listener)
                listener_data["lock"].release()

    start_time = time.time()
    for thread, listener in threads.items():
        timeout = start_time + listeners[listener]["timeout"] - time.time()
        if timeout < 1:
            timeout = 1
        log.debug(
            f"Waiting up to {timeout:.2f}s for event listener {thread.name} to finish"
        )
        thread.join(timeout)
        log.debug(
            f"Event listener {thread.name} finished (timeout: {thread.is_alive()})"
        )
예제 #9
0
    def collect(self) -> None:
        log.debug("plugin: AWS collecting resources")
        if not self.authenticated:
            log.error("Failed to authenticate - skipping collection")
            return

        if (ArgumentParser.args.aws_assume_current
                and not ArgumentParser.args.aws_dont_scrape_current):
            log.warning(
                "You specified --aws-assume-current but not --aws-dont-scrape-current! "
                "This will result in the same account being scraped twice and is likely not what you want."
            )

        if ArgumentParser.args.aws_role and ArgumentParser.args.aws_scrape_org:
            accounts = [
                AWSAccount(aws_account_id, {},
                           role=ArgumentParser.args.aws_role)
                for aws_account_id in get_org_accounts(
                    filter_current_account=not ArgumentParser.args.
                    aws_assume_current) if aws_account_id not in
                ArgumentParser.args.aws_scrape_exclude_account
            ]
            if not ArgumentParser.args.aws_dont_scrape_current:
                accounts.append(AWSAccount(current_account_id(), {}))
        elif ArgumentParser.args.aws_role and ArgumentParser.args.aws_account:
            accounts = [
                AWSAccount(aws_account_id, {},
                           role=ArgumentParser.args.aws_role)
                for aws_account_id in ArgumentParser.args.aws_account
            ]
        else:
            accounts = [AWSAccount(current_account_id(), {})]

        max_workers = (len(accounts) if len(accounts) <
                       ArgumentParser.args.aws_account_pool_size else
                       ArgumentParser.args.aws_account_pool_size)
        pool_args = {"max_workers": max_workers}
        if ArgumentParser.args.aws_fork:
            pool_args["mp_context"] = multiprocessing.get_context("spawn")
            pool_args["initializer"] = resotolib.signal.initializer
            pool_executor = futures.ProcessPoolExecutor
        else:
            pool_executor = futures.ThreadPoolExecutor

        with pool_executor(**pool_args) as executor:
            wait_for = [
                executor.submit(collect_account, account, self.regions,
                                ArgumentParser.args) for account in accounts
            ]
            for future in futures.as_completed(wait_for):
                account_graph = future.result()
                if not isinstance(account_graph, Graph):
                    log.error(
                        f"Returned account graph has invalid type {type(account_graph)}"
                    )
                    continue
                self.graph.merge(account_graph)
예제 #10
0
def force_shutdown(delay: int = 10) -> None:
    time.sleep(delay)
    log_stats()
    log.error(
        (
            "Some child process or thread timed out during shutdown"
            " - forcing shutdown completion"
        )
    )
    os._exit(0)
예제 #11
0
 def run(self) -> None:
     self.name = self.identifier
     add_event_listener(EventType.SHUTDOWN, self.shutdown)
     while not self.shutdown_event.is_set():
         log.info("Connecting to resotocore message bus")
         try:
             self.connect()
         except Exception as e:
             log.error(e)
         time.sleep(10)
예제 #12
0
def update_metrics(metrics: Metrics, query_uri: str) -> None:
    metrics_descriptions = find_metrics()
    for _, data in metrics_descriptions.items():
        if shutdown_event.is_set():
            return

        metrics_query = data.get("query")
        metric_type = data.get("type")
        metric_help = data.get("help", "")

        if metrics_query is None:
            continue

        if metric_type not in ("gauge", "counter"):
            log.error(
                f"Do not know how to handle metrics of type {metric_type}")
            continue

        try:
            for result in query(metrics_query, query_uri):
                labels = get_labels_from_result(result)
                label_values = get_label_values_from_result(result, labels)

                for metric_name, metric_value in get_metrics_from_result(
                        result).items():
                    if metric_name not in metrics.staging:
                        log.debug(
                            f"Adding metric {metric_name} of type {metric_type}"
                        )
                        if metric_type == "gauge":
                            metrics.staging[metric_name] = GaugeMetricFamily(
                                f"resoto_{metric_name}",
                                metric_help,
                                labels=labels,
                            )
                        elif metric_type == "counter":
                            metrics.staging[metric_name] = CounterMetricFamily(
                                f"resoto_{metric_name}",
                                metric_help,
                                labels=labels,
                            )
                    if metric_type == "counter" and metric_name in metrics.live:
                        current_metric = metrics.live[metric_name]
                        for sample in current_metric.samples:
                            if sample.labels == result.get("group"):
                                metric_value += sample.value
                                break
                    metrics.staging[metric_name].add_metric(
                        label_values, metric_value)
        except RuntimeError as e:
            log.error(e)
            continue
    metrics.swap()
예제 #13
0
def validate_graph_dataclasses_and_nodes(graph: Graph) -> None:
    log.debug("Validating attribute types of all graph dataclasses")
    node_chksums = {}
    for node in graph.nodes:
        if isinstance(node, BaseResource):
            validate_dataclass(node)
            if node.chksum not in node_chksums:
                node_chksums[node.chksum] = node
            else:
                log.error(
                    f"Duplicate checksum {node.chksum} for node {node.rtdname} in graph"
                )
예제 #14
0
 def shutdown(self, event: Event = None) -> None:
     log.debug(
         "Received shutdown event - shutting down resotocore message bus listener"
     )
     self.shutdown_event.set()
     for core_action in self.actions.keys():
         try:
             self.unregister(core_action)
         except Exception as e:
             log.error(e)
     if self.ws:
         self.ws.close()
예제 #15
0
    def read(self) -> bool:
        if not self.config_file:
            log.error(
                "Attribute config_file is not set on TagValidatorConfig() instance"
            )
            return False

        with open(self.config_file) as config_file:
            config = yaml.load(config_file, Loader=yaml.FullLoader)
        if self.validate(config):
            self.update(config)
        return True
예제 #16
0
    def collect(self) -> None:
        """Run by resoto during the global collect() run.

        This method kicks off code that adds GCP resources to `self.graph`.
        When collect() finishes the parent thread will take `self.graph` and merge
        it with the global production graph.
        """
        log.debug("plugin: GCP collecting resources")

        credentials = Credentials.all()
        if len(ArgumentParser.args.gcp_project) > 0:
            for project in list(credentials.keys()):
                if project not in ArgumentParser.args.gcp_project:
                    del credentials[project]

        if len(credentials) == 0:
            return

        max_workers = (len(credentials) if len(credentials) <
                       ArgumentParser.args.gcp_project_pool_size else
                       ArgumentParser.args.gcp_project_pool_size)
        pool_args = {"max_workers": max_workers}
        if ArgumentParser.args.gcp_fork:
            pool_args["mp_context"] = multiprocessing.get_context("spawn")
            pool_args["initializer"] = resotolib.signal.initializer
            pool_executor = futures.ProcessPoolExecutor
            collect_args = {
                "args":
                ArgumentParser.args,
                "credentials":
                credentials if all(v is None
                                   for v in credentials.values()) else None,
            }
        else:
            pool_executor = futures.ThreadPoolExecutor
            collect_args = {}

        with pool_executor(**pool_args) as executor:
            wait_for = [
                executor.submit(
                    self.collect_project,
                    project_id,
                    **collect_args,
                ) for project_id in credentials.keys()
            ]
            for future in futures.as_completed(wait_for):
                project_graph = future.result()
                if not isinstance(project_graph, Graph):
                    log.error(
                        f"Skipping invalid project_graph {type(project_graph)}"
                    )
                    continue
                self.graph.merge(project_graph)
예제 #17
0
    def patch_nodes(self, graph: Graph):
        headers = {"Content-Type": "application/x-ndjson"}
        if getattr(ArgumentParser.args, "psk", None):
            encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk)

        r = requests.patch(
            f"{self.graph_uri}/nodes", data=GraphChangeIterator(graph), headers=headers
        )
        if r.status_code != 200:
            err = r.content.decode("utf-8")
            log.error(err)
            raise RuntimeError(f"Failed to patch nodes: {err}")
예제 #18
0
 def delete(
     self,
     graph: Graph,
     snapshot_before_delete: bool = False,
     snapshot_timeout: int = 3600,
 ) -> bool:
     ec2 = aws_resource(self, "ec2", graph)
     volume = ec2.Volume(self.id)
     if snapshot_before_delete or self.snapshot_before_delete:
         log_msg = "Creating snapshot before deletion"
         self.log(log_msg)
         log.debug(f"{log_msg} of {self.kind} {self.dname}")
         snapshot = volume.create_snapshot(
             Description=f"resoto created snapshot for volume {self.id}",
             TagSpecifications=[
                 {
                     "ResourceType": "snapshot",
                     "Tags": [
                         {"Key": "Name", "Value": f"CK snap of {self.id}"},
                         {"Key": "owner", "Value": "resoto"},
                     ],
                 },
             ],
         )
         start_utime = time.time()
         while snapshot.state == "pending":
             if time.time() > start_utime + snapshot_timeout:
                 raise TimeoutError(
                     (
                         f"AWS EC2 Volume Snapshot {self.dname} tag update timed out after "
                         f"{snapshot_timeout} seconds with status {snapshot.state} ({snapshot.state_message})"
                     )
                 )
             time.sleep(10)
             log.debug(
                 (
                     f"Waiting for snapshot {snapshot.id} to finish before deletion of "
                     f"{self.kind} {self.dname} - progress {snapshot.progress}"
                 )
             )
             snapshot = ec2.Snapshot(snapshot.id)
         if snapshot.state != "completed":
             log_msg = f"Failed to create snapshot - status {snapshot.state} ({snapshot.state_message})"
             self.log(log_msg)
             log.error(
                 (
                     f"{log_msg} for {self.kind} {self.dname} in "
                     f"account {self.account(graph).dname} region {self.region(graph).name}"
                 )
             )
             return False
     volume.delete()
     return True
예제 #19
0
 def wrapper(self, *args, **kwargs):
     if not isinstance(self, BaseResource):
         raise ValueError(
             "unless_protected() only supports BaseResource type objects")
     if self.protected:
         log.error(
             f"Resource {self.rtdname} is protected - refusing modification"
         )
         self.log(
             ("Modification was requested even though resource is protected"
              " - refusing"))
         return False
     return f(self, *args, **kwargs)
예제 #20
0
    def pre_cleanup(self, graph=None) -> bool:
        if not hasattr(self, "pre_delete"):
            return True

        if graph is None:
            graph = self._graph

        if self.phantom:
            raise RuntimeError(
                f"Can't cleanup phantom resource {self.rtdname}")

        if self.cleaned:
            log.debug(f"Resource {self.rtdname} has already been cleaned up")
            return True

        account = self.account(graph)
        region = self.region(graph)
        if not isinstance(account, BaseAccount) or not isinstance(
                region, BaseRegion):
            log.error(
                ("Could not determine account or region for pre cleanup of"
                 f" {self.rtdname}"))
            return False

        log_suffix = f" in account {account.dname} region {region.name}"
        self.log("Trying to run pre clean up")
        log.debug(f"Trying to run pre clean up {self.rtdname}{log_suffix}")
        try:
            if not getattr(self, "pre_delete")(graph):
                self.log("Failed to run pre clean up")
                log.error(
                    f"Failed to run pre clean up {self.rtdname}{log_suffix}")
                return False
            self.log("Successfully ran pre clean up")
            log.info(
                f"Successfully ran pre clean up {self.rtdname}{log_suffix}")
        except Exception as e:
            self.log("An error occurred during pre clean up", exception=e)
            log.exception(
                f"An error occurred during pre clean up {self.rtdname}{log_suffix}"
            )
            cloud = self.cloud(graph)
            metrics_resource_pre_cleanup_exceptions.labels(
                cloud=cloud.name,
                account=account.dname,
                region=region.name,
                kind=self.kind,
            ).inc()
            return False
        return True
예제 #21
0
def create_graph(resotocore_base_uri: str, resotocore_graph: str):
    graph_uri = f"{resotocore_base_uri}/graph/{resotocore_graph}"

    log.debug(f"Creating graph {resotocore_graph} via {graph_uri}")

    headers = {
        "accept": "application/json",
        "Content-Type": "application/json",
    }
    if getattr(ArgumentParser.args, "psk", None):
        encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk)
    r = requests.post(graph_uri, data="", headers=headers)
    if r.status_code != 200:
        log.error(r.content)
        raise RuntimeError(f"Failed to create graph: {r.content}")
예제 #22
0
    def add_edge(
        self,
        src: BaseResource,
        dst: BaseResource,
        key: EdgeKey = None,
        edge_type: EdgeType = None,
        **attr,
    ):
        if src is None or dst is None:
            log.error(f"Not creating edge from or to NoneType: {src} to {dst}")
            return

        if edge_type is None:
            edge_type = EdgeType.default
        if key is None:
            key = EdgeKey(src=src, dst=dst, edge_type=edge_type)

        if self.has_edge(src, dst, key=key):
            log.error(f"Edge from {src} to {dst} already exists in graph")
            return
        return_key = super().add_edge(src, dst, key=key, **attr)
        if (
            self._log_edge_creation
            and isinstance(src, BaseResource)
            and isinstance(dst, BaseResource)
        ):
            log.debug(
                f"Added edge from {src.rtdname} to {dst.rtdname} (type: {edge_type.value})"
            )
            try:
                src.successor_added(dst, self)
            except Exception:
                log.exception(
                    (
                        f"Unhandled exception while telling {src.rtdname}"
                        f" that {dst.rtdname} was added as a successor"
                    )
                )
            try:
                dst.predecessor_added(src, self)
            except Exception:
                log.exception(
                    (
                        f"Unhandled exception while telling {dst.rtdname}"
                        f" that {src.rtdname} was added as a predecessor"
                    )
                )
        return return_key
예제 #23
0
 def post(uri, data, headers):
     if getattr(ArgumentParser.args, "psk", None):
         encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk)
     r = requests.post(uri, data=data, headers=headers, stream=True)
     if r.status_code != 200:
         log.error(r.content.decode())
         raise RuntimeError(f"Failed to query graph: {r.content.decode()}")
     for line in r.iter_lines():
         if not line:
             continue
         try:
             data = json.loads(line.decode("utf-8"))
             yield data
         except TypeError as e:
             log.error(e)
             continue
예제 #24
0
    def run(self) -> None:
        self.name = self.identifier
        add_event_listener(EventType.SHUTDOWN, self.shutdown)

        for i in range(self.max_workers):
            threading.Thread(
                target=self.worker, daemon=True, name=f"worker-{i}"
            ).start()

        while not self.shutdown_event.is_set():
            log.info("Connecting to resotocore task queue")
            try:
                self.connect()
            except Exception as e:
                log.error(e)
            time.sleep(10)
예제 #25
0
 def get(self) -> Dict:
     changes = {}
     for section in ("reported", "desired", "metadata"):
         for attribute in getattr(self, section, []):
             if section not in changes:
                 changes[section] = {}
             try:
                 changes[section][attribute] = getattr(self.node, attribute)
             except AttributeError:
                 log.error(
                     f"Resource {self.node.rtdname} has no attribute {attribute}"
                 )
     if len(self.node.event_log) > 0:
         if "metadata" not in changes:
             changes[section] = {}
         changes["metadata"]["event_log"] = self.node.str_event_log
     return changes
예제 #26
0
def increase_limits() -> None:
    if sys.platform != "linux":
        return
    for limit_name in ("RLIMIT_NOFILE", "RLIMIT_NPROC"):
        soft_limit, hard_limit = resource.getrlimit(
            getattr(resource, limit_name))
        log.debug(
            f"Current {limit_name} soft: {soft_limit} hard: {hard_limit}")
        try:
            if soft_limit < hard_limit:
                log.debug(
                    f"Increasing {limit_name} {soft_limit} -> {hard_limit}")
                resource.setrlimit(getattr(resource, limit_name),
                                   (hard_limit, hard_limit))
        except (ValueError):
            log.error(
                f"Failed to increase {limit_name} {soft_limit} -> {hard_limit}"
            )
예제 #27
0
    def cleanup(self) -> None:
        if not ArgumentParser.args.cleanup:
            log.error(
                ("Cleanup called but --cleanup flag not provided at startup"
                 " - ignoring call"))
            return

        log.info("Running cleanup")
        # create a subgraph of all the nodes that have a delete edge
        delete_graph = DiGraph(self.graph.edge_type_subgraph(EdgeType.delete))
        # from that graph delete all the nodes not marked for cleanup
        for node in list(delete_graph.nodes):
            if not node.clean:
                delete_graph.remove_node(node)
        # add all the nodes that are supposed to be cleaned
        # but do not have a delete edge so weren't part of the
        # subgraph
        for node in self.graph.nodes:
            if node.clean and node not in delete_graph:
                delete_graph.add_node(node)
        cleanup_nodes = list(delete_graph.nodes)

        for node in cleanup_nodes:
            log.debug(f"Adding {node.rtdname} to cleanup plan")

        log.debug(f"Sending {len(cleanup_nodes)} nodes to pre-cleanup pool")
        with ThreadPoolExecutor(
                max_workers=ArgumentParser.args.cleanup_pool_size,
                thread_name_prefix="pre_cleaner",
        ) as executor:
            executor.map(self.pre_clean, cleanup_nodes)

        log.debug(f"Running parallel cleanup on {len(cleanup_nodes)} nodes")
        parallel_pass_num = 1
        for nodes in dependent_node_iterator(delete_graph):
            log.debug(
                f"Cleaning {len(nodes)} nodes in {ordinal(parallel_pass_num)} pass"
            )
            with ThreadPoolExecutor(
                    max_workers=ArgumentParser.args.cleanup_pool_size,
                    thread_name_prefix="cleaner",
            ) as executor:
                executor.map(self.clean, nodes)
            parallel_pass_num += 1
예제 #28
0
 def export_graph(self):
     with self.export_lock:
         start_time = time()
         for node in self.graph.nodes:
             node_dict = node_to_dict(node)
             if isinstance(node, self.graph_merge_kind):
                 log.debug(f"Replacing sub graph below {node.rtdname}")
                 if "metadata" not in node_dict or not isinstance(
                     node_dict["metadata"], dict
                 ):
                     node_dict["metadata"] = {}
                 node_dict["metadata"]["replace"] = True
             node_json = json.dumps(node_dict) + "\n"
             self.tempfile.write(node_json.encode())
             self.total_lines += 1
         elapsed_nodes = time() - start_time
         log.debug(f"Exported {self.number_of_nodes} nodes in {elapsed_nodes:.4f}s")
         start_time = time()
         for edge in self.graph.edges:
             from_node = edge[0]
             to_node = edge[1]
             if not isinstance(from_node, BaseResource) or not isinstance(
                 to_node, BaseResource
             ):
                 log.error(f"One of {from_node} and {to_node} is no base resource")
                 continue
             edge_dict = {"from": from_node.chksum, "to": to_node.chksum}
             if len(edge) == 3:
                 key = edge[2]
                 if isinstance(key, EdgeKey) and key.edge_type != EdgeType.default:
                     edge_dict["edge_type"] = key.edge_type.value
             edge_json = json.dumps(edge_dict) + "\n"
             self.tempfile.write(edge_json.encode())
             self.total_lines += 1
         elapsed_edges = time() - start_time
         log.debug(f"Exported {self.number_of_edges} edges in {elapsed_edges:.4f}s")
         elapsed = elapsed_nodes + elapsed_edges
         log.info(f"Exported {self.total_lines} nodes and edges in {elapsed:.4f}s")
         self.graph_exported = True
         del self.graph
         self.tempfile.seek(0)
예제 #29
0
def collect_plugin_graph(collector_plugin: BaseCollectorPlugin,
                         args=None) -> Optional[Graph]:
    collector: BaseCollectorPlugin = collector_plugin()
    collector_name = f"collector_{collector.cloud}"
    resotolib.signal.set_thread_name(collector_name)

    if args is not None:
        ArgumentParser.args = args
        setup_logger("resotoworker")

    log.debug(f"Starting new collect process for {collector.cloud}")
    start_time = time()
    collector.start()
    collector.join(ArgumentParser.args.timeout)
    elapsed = time() - start_time
    if not collector.is_alive():  # The plugin has finished its work
        if not collector.finished:
            log.error(f"Plugin {collector.cloud} did not finish collection"
                      " - ignoring plugin results")
            return None
        if not collector.graph.is_dag_per_edge_type():
            log.error(f"Graph of plugin {collector.cloud} is not acyclic"
                      " - ignoring plugin results")
            return None
        log.info(
            f"Collector of plugin {collector.cloud} finished in {elapsed:.4f}s"
        )
        return collector.graph
    else:
        log.error(
            f"Plugin {collector.cloud} timed out - discarding Plugin graph")
        return None
예제 #30
0
    def collect(collectors: List[BaseCollectorPlugin]) -> Graph:
        graph = Graph(root=GraphRoot("root", {}))

        max_workers = (len(collectors)
                       if len(collectors) < ArgumentParser.args.pool_size else
                       ArgumentParser.args.pool_size)
        if max_workers == 0:
            log.error(
                "No workers configured or no collector plugins loaded - skipping collect"
            )
            return
        pool_args = {"max_workers": max_workers}
        if ArgumentParser.args.fork:
            pool_args["mp_context"] = multiprocessing.get_context("spawn")
            pool_args["initializer"] = resotolib.signal.initializer
            pool_executor = futures.ProcessPoolExecutor
            collect_args = {"args": ArgumentParser.args}
        else:
            pool_executor = futures.ThreadPoolExecutor
            collect_args = {}

        with pool_executor(**pool_args) as executor:
            wait_for = [
                executor.submit(
                    collect_plugin_graph,
                    collector,
                    **collect_args,
                ) for collector in collectors
            ]
            for future in futures.as_completed(wait_for):
                cluster_graph = future.result()
                if not isinstance(cluster_graph, Graph):
                    log.error(
                        f"Skipping invalid cluster_graph {type(cluster_graph)}"
                    )
                    continue
                graph.merge(cluster_graph)
        sanitize(graph)
        return graph