def send_graph( graph_export_iterator: GraphExportIterator, resotocore_base_uri: str, resotocore_graph: str, ): merge_uri = f"{resotocore_base_uri}/graph/{resotocore_graph}/merge" log.debug(f"Sending graph via {merge_uri}") headers = { "Content-Type": "application/x-ndjson", "Resoto-Worker-Nodes": str(graph_export_iterator.number_of_nodes), "Resoto-Worker-Edges": str(graph_export_iterator.number_of_edges), } if getattr(ArgumentParser.args, "psk", None): encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk) r = requests.post( merge_uri, data=graph_export_iterator, headers=headers, ) if r.status_code != 200: log.error(r.content) raise RuntimeError(f"Failed to send graph: {r.content}") log.debug(f"resotocore reply: {r.content.decode()}") log.debug(f"Sent {graph_export_iterator.total_lines} items to resotocore")
def core_actions_processor(metrics: Metrics, query_uri: str, message: dict) -> None: if not isinstance(message, dict): log.error(f"Invalid message: {message}") return kind = message.get("kind") message_type = message.get("message_type") data = message.get("data") log.debug( f"Received message of kind {kind}, type {message_type}, data: {data}") if kind == "action": try: if message_type == "generate_metrics": start_time = time.time() update_metrics(metrics, query_uri) run_time = time.time() - start_time log.debug(f"Updated metrics for {run_time:.2f} seconds") else: raise ValueError(f"Unknown message type {message_type}") except Exception as e: log.exception(f"Failed to {message_type}: {e}") reply_kind = "action_error" else: reply_kind = "action_done" reply_message = { "kind": reply_kind, "message_type": message_type, "data": data, } return reply_message
def get_org_accounts(filter_current_account=False): session = aws_session() client = session.client("organizations") accounts = [] try: response = client.list_accounts() accounts = response.get("Accounts", []) while response.get("NextToken") is not None: response = client.list_accounts(NextToken=response["NextToken"]) accounts.extend(response.get("Accounts", [])) except botocore.exceptions.ClientError as e: if e.response["Error"]["Code"] == "AccessDeniedException": log.error( "AWS error - missing permissions to list organization accounts" ) else: raise filter_account_id = current_account_id() if filter_current_account else -1 accounts = [ aws_account["Id"] for aws_account in accounts if aws_account["Id"] != filter_account_id ] for account in accounts: log.debug(f"AWS found org account {account}") log.info(f"AWS found a total of {len(accounts)} org accounts") return accounts
def __delitem__(self, key): if self.parent_resource and isinstance(self.parent_resource, BaseResource): log.debug(f"Calling parent resource to delete tag {key} in cloud") try: if self.parent_resource.delete_tag(key): log_msg = f"Successfully deleted tag {key} in cloud" self.parent_resource._changes.add("tags") self.parent_resource.log(log_msg) log.info((f"{log_msg} for {self.parent_resource.kind}" f" {self.parent_resource.id}")) return super().__delitem__(key) else: log_msg = f"Error deleting tag {key} in cloud" self.parent_resource.log(log_msg) log.error((f"{log_msg} for {self.parent_resource.kind}" f" {self.parent_resource.id}")) except Exception as e: log_msg = ( f"Unhandled exception while trying to delete tag {key} in cloud:" f" {type(e)} {e}") self.parent_resource.log(log_msg, exception=e) if self.parent_resource._raise_tags_exceptions: raise else: log.exception(log_msg) else: return super().__delitem__(key)
def graph(self, query: str) -> Graph: def process_data_line(data: dict, graph: Graph): """Process a single line of resotocore graph data""" if data.get("type") == "node": node_id = data.get("id") node = node_from_dict(data) node_mapping[node_id] = node log.debug(f"Adding node {node} to the graph") graph.add_node(node) if node.kind == "graph_root": log.debug(f"Setting graph root {node}") graph.root = node elif data.get("type") == "edge": node_from = data.get("from") node_to = data.get("to") edge_type = EdgeType.from_value(data.get("edge_type")) if node_from not in node_mapping or node_to not in node_mapping: raise ValueError(f"One of {node_from} -> {node_to} unknown") graph.add_edge( node_mapping[node_from], node_mapping[node_to], edge_type=edge_type ) graph = Graph() node_mapping = {} for data in self.query(query): try: process_data_line(data, graph) except ValueError as e: log.error(e) continue sanitize(graph) return graph
def add_event_listener( event_type: EventType, listener: Callable, blocking: bool = False, timeout: int = None, one_shot: bool = False, ) -> bool: """Add an Event Listener""" if not callable(listener): log.error( f"Error registering {listener} of type {type(listener)} with event" f" {event_type.name}") return False if timeout is None: if hasattr(ArgumentParser.args, "event_timeout"): timeout = ArgumentParser.args.event_timeout else: timeout = 900 log.debug(f"Registering {listener} with event {event_type.name}" f" (blocking: {blocking}, one-shot: {one_shot})") with _events_lock.write_access: if not event_listener_registered(event_type, listener): _events[event_type][listener] = { "blocking": blocking, "timeout": timeout, "one-shot": one_shot, "lock": Lock(), "pid": os.getpid(), } return True return False
def update_model(graph: Graph, resotocore_base_uri: str, dump_json: bool = False, tempdir: str = None) -> None: model_uri = f"{resotocore_base_uri}/model" log.debug(f"Updating model via {model_uri}") model_json = json.dumps(graph.export_model(), indent=4) if dump_json: ts = datetime.now().strftime("%Y-%m-%d-%H-%M") with tempfile.NamedTemporaryFile( prefix=f"resoto-model-{ts}-", suffix=".json", delete=not dump_json, dir=tempdir, ) as model_outfile: log.info(f"Writing model json to file {model_outfile.name}") model_outfile.write(model_json.encode()) headers = { "Content-Type": "application/json", } if getattr(ArgumentParser.args, "psk", None): encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk) r = requests.patch(model_uri, data=model_json, headers=headers) if r.status_code != 200: log.error(r.content) raise RuntimeError(f"Failed to create model: {r.content}")
def dispatch_event(event: Event, blocking: bool = False) -> None: """Dispatch an Event""" waiting_str = "" if blocking else "not " log.debug( f"Dispatching event {event.event_type.name} and {waiting_str}waiting for" " listeners to return") if event.event_type not in _events.keys(): return with _events_lock.read_access: # Event listeners might unregister themselves during event dispatch # so we will work on a shallow copy while processing the current event. listeners = dict(_events[event.event_type]) threads = {} for listener, listener_data in listeners.items(): try: if listener_data["pid"] != os.getpid(): continue if listener_data["one-shot"] and not listener_data["lock"].acquire( blocking=False): log.error(f"Not calling one-shot listener {listener} of type" f" {type(listener)} - can't acquire lock") continue log.debug(f"Calling listener {listener} of type {type(listener)}" f" (blocking: {listener_data['blocking']})") thread_name = (f"{event.event_type.name.lower()}_event" f"-{getattr(listener, '__name__', 'anonymous')}") t = Thread(target=listener, args=[event], name=thread_name) if blocking or listener_data["blocking"]: threads[t] = listener t.start() except Exception: log.exception("Caught unhandled event callback exception") finally: if listener_data["one-shot"]: log.debug( f"One-shot specified for event {event.event_type.name} " f"listener {listener} - removing event listener") remove_event_listener(event.event_type, listener) listener_data["lock"].release() start_time = time.time() for thread, listener in threads.items(): timeout = start_time + listeners[listener]["timeout"] - time.time() if timeout < 1: timeout = 1 log.debug( f"Waiting up to {timeout:.2f}s for event listener {thread.name} to finish" ) thread.join(timeout) log.debug( f"Event listener {thread.name} finished (timeout: {thread.is_alive()})" )
def collect(self) -> None: log.debug("plugin: AWS collecting resources") if not self.authenticated: log.error("Failed to authenticate - skipping collection") return if (ArgumentParser.args.aws_assume_current and not ArgumentParser.args.aws_dont_scrape_current): log.warning( "You specified --aws-assume-current but not --aws-dont-scrape-current! " "This will result in the same account being scraped twice and is likely not what you want." ) if ArgumentParser.args.aws_role and ArgumentParser.args.aws_scrape_org: accounts = [ AWSAccount(aws_account_id, {}, role=ArgumentParser.args.aws_role) for aws_account_id in get_org_accounts( filter_current_account=not ArgumentParser.args. aws_assume_current) if aws_account_id not in ArgumentParser.args.aws_scrape_exclude_account ] if not ArgumentParser.args.aws_dont_scrape_current: accounts.append(AWSAccount(current_account_id(), {})) elif ArgumentParser.args.aws_role and ArgumentParser.args.aws_account: accounts = [ AWSAccount(aws_account_id, {}, role=ArgumentParser.args.aws_role) for aws_account_id in ArgumentParser.args.aws_account ] else: accounts = [AWSAccount(current_account_id(), {})] max_workers = (len(accounts) if len(accounts) < ArgumentParser.args.aws_account_pool_size else ArgumentParser.args.aws_account_pool_size) pool_args = {"max_workers": max_workers} if ArgumentParser.args.aws_fork: pool_args["mp_context"] = multiprocessing.get_context("spawn") pool_args["initializer"] = resotolib.signal.initializer pool_executor = futures.ProcessPoolExecutor else: pool_executor = futures.ThreadPoolExecutor with pool_executor(**pool_args) as executor: wait_for = [ executor.submit(collect_account, account, self.regions, ArgumentParser.args) for account in accounts ] for future in futures.as_completed(wait_for): account_graph = future.result() if not isinstance(account_graph, Graph): log.error( f"Returned account graph has invalid type {type(account_graph)}" ) continue self.graph.merge(account_graph)
def force_shutdown(delay: int = 10) -> None: time.sleep(delay) log_stats() log.error( ( "Some child process or thread timed out during shutdown" " - forcing shutdown completion" ) ) os._exit(0)
def run(self) -> None: self.name = self.identifier add_event_listener(EventType.SHUTDOWN, self.shutdown) while not self.shutdown_event.is_set(): log.info("Connecting to resotocore message bus") try: self.connect() except Exception as e: log.error(e) time.sleep(10)
def update_metrics(metrics: Metrics, query_uri: str) -> None: metrics_descriptions = find_metrics() for _, data in metrics_descriptions.items(): if shutdown_event.is_set(): return metrics_query = data.get("query") metric_type = data.get("type") metric_help = data.get("help", "") if metrics_query is None: continue if metric_type not in ("gauge", "counter"): log.error( f"Do not know how to handle metrics of type {metric_type}") continue try: for result in query(metrics_query, query_uri): labels = get_labels_from_result(result) label_values = get_label_values_from_result(result, labels) for metric_name, metric_value in get_metrics_from_result( result).items(): if metric_name not in metrics.staging: log.debug( f"Adding metric {metric_name} of type {metric_type}" ) if metric_type == "gauge": metrics.staging[metric_name] = GaugeMetricFamily( f"resoto_{metric_name}", metric_help, labels=labels, ) elif metric_type == "counter": metrics.staging[metric_name] = CounterMetricFamily( f"resoto_{metric_name}", metric_help, labels=labels, ) if metric_type == "counter" and metric_name in metrics.live: current_metric = metrics.live[metric_name] for sample in current_metric.samples: if sample.labels == result.get("group"): metric_value += sample.value break metrics.staging[metric_name].add_metric( label_values, metric_value) except RuntimeError as e: log.error(e) continue metrics.swap()
def validate_graph_dataclasses_and_nodes(graph: Graph) -> None: log.debug("Validating attribute types of all graph dataclasses") node_chksums = {} for node in graph.nodes: if isinstance(node, BaseResource): validate_dataclass(node) if node.chksum not in node_chksums: node_chksums[node.chksum] = node else: log.error( f"Duplicate checksum {node.chksum} for node {node.rtdname} in graph" )
def shutdown(self, event: Event = None) -> None: log.debug( "Received shutdown event - shutting down resotocore message bus listener" ) self.shutdown_event.set() for core_action in self.actions.keys(): try: self.unregister(core_action) except Exception as e: log.error(e) if self.ws: self.ws.close()
def read(self) -> bool: if not self.config_file: log.error( "Attribute config_file is not set on TagValidatorConfig() instance" ) return False with open(self.config_file) as config_file: config = yaml.load(config_file, Loader=yaml.FullLoader) if self.validate(config): self.update(config) return True
def collect(self) -> None: """Run by resoto during the global collect() run. This method kicks off code that adds GCP resources to `self.graph`. When collect() finishes the parent thread will take `self.graph` and merge it with the global production graph. """ log.debug("plugin: GCP collecting resources") credentials = Credentials.all() if len(ArgumentParser.args.gcp_project) > 0: for project in list(credentials.keys()): if project not in ArgumentParser.args.gcp_project: del credentials[project] if len(credentials) == 0: return max_workers = (len(credentials) if len(credentials) < ArgumentParser.args.gcp_project_pool_size else ArgumentParser.args.gcp_project_pool_size) pool_args = {"max_workers": max_workers} if ArgumentParser.args.gcp_fork: pool_args["mp_context"] = multiprocessing.get_context("spawn") pool_args["initializer"] = resotolib.signal.initializer pool_executor = futures.ProcessPoolExecutor collect_args = { "args": ArgumentParser.args, "credentials": credentials if all(v is None for v in credentials.values()) else None, } else: pool_executor = futures.ThreadPoolExecutor collect_args = {} with pool_executor(**pool_args) as executor: wait_for = [ executor.submit( self.collect_project, project_id, **collect_args, ) for project_id in credentials.keys() ] for future in futures.as_completed(wait_for): project_graph = future.result() if not isinstance(project_graph, Graph): log.error( f"Skipping invalid project_graph {type(project_graph)}" ) continue self.graph.merge(project_graph)
def patch_nodes(self, graph: Graph): headers = {"Content-Type": "application/x-ndjson"} if getattr(ArgumentParser.args, "psk", None): encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk) r = requests.patch( f"{self.graph_uri}/nodes", data=GraphChangeIterator(graph), headers=headers ) if r.status_code != 200: err = r.content.decode("utf-8") log.error(err) raise RuntimeError(f"Failed to patch nodes: {err}")
def delete( self, graph: Graph, snapshot_before_delete: bool = False, snapshot_timeout: int = 3600, ) -> bool: ec2 = aws_resource(self, "ec2", graph) volume = ec2.Volume(self.id) if snapshot_before_delete or self.snapshot_before_delete: log_msg = "Creating snapshot before deletion" self.log(log_msg) log.debug(f"{log_msg} of {self.kind} {self.dname}") snapshot = volume.create_snapshot( Description=f"resoto created snapshot for volume {self.id}", TagSpecifications=[ { "ResourceType": "snapshot", "Tags": [ {"Key": "Name", "Value": f"CK snap of {self.id}"}, {"Key": "owner", "Value": "resoto"}, ], }, ], ) start_utime = time.time() while snapshot.state == "pending": if time.time() > start_utime + snapshot_timeout: raise TimeoutError( ( f"AWS EC2 Volume Snapshot {self.dname} tag update timed out after " f"{snapshot_timeout} seconds with status {snapshot.state} ({snapshot.state_message})" ) ) time.sleep(10) log.debug( ( f"Waiting for snapshot {snapshot.id} to finish before deletion of " f"{self.kind} {self.dname} - progress {snapshot.progress}" ) ) snapshot = ec2.Snapshot(snapshot.id) if snapshot.state != "completed": log_msg = f"Failed to create snapshot - status {snapshot.state} ({snapshot.state_message})" self.log(log_msg) log.error( ( f"{log_msg} for {self.kind} {self.dname} in " f"account {self.account(graph).dname} region {self.region(graph).name}" ) ) return False volume.delete() return True
def wrapper(self, *args, **kwargs): if not isinstance(self, BaseResource): raise ValueError( "unless_protected() only supports BaseResource type objects") if self.protected: log.error( f"Resource {self.rtdname} is protected - refusing modification" ) self.log( ("Modification was requested even though resource is protected" " - refusing")) return False return f(self, *args, **kwargs)
def pre_cleanup(self, graph=None) -> bool: if not hasattr(self, "pre_delete"): return True if graph is None: graph = self._graph if self.phantom: raise RuntimeError( f"Can't cleanup phantom resource {self.rtdname}") if self.cleaned: log.debug(f"Resource {self.rtdname} has already been cleaned up") return True account = self.account(graph) region = self.region(graph) if not isinstance(account, BaseAccount) or not isinstance( region, BaseRegion): log.error( ("Could not determine account or region for pre cleanup of" f" {self.rtdname}")) return False log_suffix = f" in account {account.dname} region {region.name}" self.log("Trying to run pre clean up") log.debug(f"Trying to run pre clean up {self.rtdname}{log_suffix}") try: if not getattr(self, "pre_delete")(graph): self.log("Failed to run pre clean up") log.error( f"Failed to run pre clean up {self.rtdname}{log_suffix}") return False self.log("Successfully ran pre clean up") log.info( f"Successfully ran pre clean up {self.rtdname}{log_suffix}") except Exception as e: self.log("An error occurred during pre clean up", exception=e) log.exception( f"An error occurred during pre clean up {self.rtdname}{log_suffix}" ) cloud = self.cloud(graph) metrics_resource_pre_cleanup_exceptions.labels( cloud=cloud.name, account=account.dname, region=region.name, kind=self.kind, ).inc() return False return True
def create_graph(resotocore_base_uri: str, resotocore_graph: str): graph_uri = f"{resotocore_base_uri}/graph/{resotocore_graph}" log.debug(f"Creating graph {resotocore_graph} via {graph_uri}") headers = { "accept": "application/json", "Content-Type": "application/json", } if getattr(ArgumentParser.args, "psk", None): encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk) r = requests.post(graph_uri, data="", headers=headers) if r.status_code != 200: log.error(r.content) raise RuntimeError(f"Failed to create graph: {r.content}")
def add_edge( self, src: BaseResource, dst: BaseResource, key: EdgeKey = None, edge_type: EdgeType = None, **attr, ): if src is None or dst is None: log.error(f"Not creating edge from or to NoneType: {src} to {dst}") return if edge_type is None: edge_type = EdgeType.default if key is None: key = EdgeKey(src=src, dst=dst, edge_type=edge_type) if self.has_edge(src, dst, key=key): log.error(f"Edge from {src} to {dst} already exists in graph") return return_key = super().add_edge(src, dst, key=key, **attr) if ( self._log_edge_creation and isinstance(src, BaseResource) and isinstance(dst, BaseResource) ): log.debug( f"Added edge from {src.rtdname} to {dst.rtdname} (type: {edge_type.value})" ) try: src.successor_added(dst, self) except Exception: log.exception( ( f"Unhandled exception while telling {src.rtdname}" f" that {dst.rtdname} was added as a successor" ) ) try: dst.predecessor_added(src, self) except Exception: log.exception( ( f"Unhandled exception while telling {dst.rtdname}" f" that {src.rtdname} was added as a predecessor" ) ) return return_key
def post(uri, data, headers): if getattr(ArgumentParser.args, "psk", None): encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk) r = requests.post(uri, data=data, headers=headers, stream=True) if r.status_code != 200: log.error(r.content.decode()) raise RuntimeError(f"Failed to query graph: {r.content.decode()}") for line in r.iter_lines(): if not line: continue try: data = json.loads(line.decode("utf-8")) yield data except TypeError as e: log.error(e) continue
def run(self) -> None: self.name = self.identifier add_event_listener(EventType.SHUTDOWN, self.shutdown) for i in range(self.max_workers): threading.Thread( target=self.worker, daemon=True, name=f"worker-{i}" ).start() while not self.shutdown_event.is_set(): log.info("Connecting to resotocore task queue") try: self.connect() except Exception as e: log.error(e) time.sleep(10)
def get(self) -> Dict: changes = {} for section in ("reported", "desired", "metadata"): for attribute in getattr(self, section, []): if section not in changes: changes[section] = {} try: changes[section][attribute] = getattr(self.node, attribute) except AttributeError: log.error( f"Resource {self.node.rtdname} has no attribute {attribute}" ) if len(self.node.event_log) > 0: if "metadata" not in changes: changes[section] = {} changes["metadata"]["event_log"] = self.node.str_event_log return changes
def increase_limits() -> None: if sys.platform != "linux": return for limit_name in ("RLIMIT_NOFILE", "RLIMIT_NPROC"): soft_limit, hard_limit = resource.getrlimit( getattr(resource, limit_name)) log.debug( f"Current {limit_name} soft: {soft_limit} hard: {hard_limit}") try: if soft_limit < hard_limit: log.debug( f"Increasing {limit_name} {soft_limit} -> {hard_limit}") resource.setrlimit(getattr(resource, limit_name), (hard_limit, hard_limit)) except (ValueError): log.error( f"Failed to increase {limit_name} {soft_limit} -> {hard_limit}" )
def cleanup(self) -> None: if not ArgumentParser.args.cleanup: log.error( ("Cleanup called but --cleanup flag not provided at startup" " - ignoring call")) return log.info("Running cleanup") # create a subgraph of all the nodes that have a delete edge delete_graph = DiGraph(self.graph.edge_type_subgraph(EdgeType.delete)) # from that graph delete all the nodes not marked for cleanup for node in list(delete_graph.nodes): if not node.clean: delete_graph.remove_node(node) # add all the nodes that are supposed to be cleaned # but do not have a delete edge so weren't part of the # subgraph for node in self.graph.nodes: if node.clean and node not in delete_graph: delete_graph.add_node(node) cleanup_nodes = list(delete_graph.nodes) for node in cleanup_nodes: log.debug(f"Adding {node.rtdname} to cleanup plan") log.debug(f"Sending {len(cleanup_nodes)} nodes to pre-cleanup pool") with ThreadPoolExecutor( max_workers=ArgumentParser.args.cleanup_pool_size, thread_name_prefix="pre_cleaner", ) as executor: executor.map(self.pre_clean, cleanup_nodes) log.debug(f"Running parallel cleanup on {len(cleanup_nodes)} nodes") parallel_pass_num = 1 for nodes in dependent_node_iterator(delete_graph): log.debug( f"Cleaning {len(nodes)} nodes in {ordinal(parallel_pass_num)} pass" ) with ThreadPoolExecutor( max_workers=ArgumentParser.args.cleanup_pool_size, thread_name_prefix="cleaner", ) as executor: executor.map(self.clean, nodes) parallel_pass_num += 1
def export_graph(self): with self.export_lock: start_time = time() for node in self.graph.nodes: node_dict = node_to_dict(node) if isinstance(node, self.graph_merge_kind): log.debug(f"Replacing sub graph below {node.rtdname}") if "metadata" not in node_dict or not isinstance( node_dict["metadata"], dict ): node_dict["metadata"] = {} node_dict["metadata"]["replace"] = True node_json = json.dumps(node_dict) + "\n" self.tempfile.write(node_json.encode()) self.total_lines += 1 elapsed_nodes = time() - start_time log.debug(f"Exported {self.number_of_nodes} nodes in {elapsed_nodes:.4f}s") start_time = time() for edge in self.graph.edges: from_node = edge[0] to_node = edge[1] if not isinstance(from_node, BaseResource) or not isinstance( to_node, BaseResource ): log.error(f"One of {from_node} and {to_node} is no base resource") continue edge_dict = {"from": from_node.chksum, "to": to_node.chksum} if len(edge) == 3: key = edge[2] if isinstance(key, EdgeKey) and key.edge_type != EdgeType.default: edge_dict["edge_type"] = key.edge_type.value edge_json = json.dumps(edge_dict) + "\n" self.tempfile.write(edge_json.encode()) self.total_lines += 1 elapsed_edges = time() - start_time log.debug(f"Exported {self.number_of_edges} edges in {elapsed_edges:.4f}s") elapsed = elapsed_nodes + elapsed_edges log.info(f"Exported {self.total_lines} nodes and edges in {elapsed:.4f}s") self.graph_exported = True del self.graph self.tempfile.seek(0)
def collect_plugin_graph(collector_plugin: BaseCollectorPlugin, args=None) -> Optional[Graph]: collector: BaseCollectorPlugin = collector_plugin() collector_name = f"collector_{collector.cloud}" resotolib.signal.set_thread_name(collector_name) if args is not None: ArgumentParser.args = args setup_logger("resotoworker") log.debug(f"Starting new collect process for {collector.cloud}") start_time = time() collector.start() collector.join(ArgumentParser.args.timeout) elapsed = time() - start_time if not collector.is_alive(): # The plugin has finished its work if not collector.finished: log.error(f"Plugin {collector.cloud} did not finish collection" " - ignoring plugin results") return None if not collector.graph.is_dag_per_edge_type(): log.error(f"Graph of plugin {collector.cloud} is not acyclic" " - ignoring plugin results") return None log.info( f"Collector of plugin {collector.cloud} finished in {elapsed:.4f}s" ) return collector.graph else: log.error( f"Plugin {collector.cloud} timed out - discarding Plugin graph") return None
def collect(collectors: List[BaseCollectorPlugin]) -> Graph: graph = Graph(root=GraphRoot("root", {})) max_workers = (len(collectors) if len(collectors) < ArgumentParser.args.pool_size else ArgumentParser.args.pool_size) if max_workers == 0: log.error( "No workers configured or no collector plugins loaded - skipping collect" ) return pool_args = {"max_workers": max_workers} if ArgumentParser.args.fork: pool_args["mp_context"] = multiprocessing.get_context("spawn") pool_args["initializer"] = resotolib.signal.initializer pool_executor = futures.ProcessPoolExecutor collect_args = {"args": ArgumentParser.args} else: pool_executor = futures.ThreadPoolExecutor collect_args = {} with pool_executor(**pool_args) as executor: wait_for = [ executor.submit( collect_plugin_graph, collector, **collect_args, ) for collector in collectors ] for future in futures.as_completed(wait_for): cluster_graph = future.result() if not isinstance(cluster_graph, Graph): log.error( f"Skipping invalid cluster_graph {type(cluster_graph)}" ) continue graph.merge(cluster_graph) sanitize(graph) return graph