def pre_delete(self, graph: Graph) -> bool: if self.association_id is not None: ec2 = aws_client(self, "ec2", graph=graph) ec2.disassociate_address(AssociationId=self.association_id) else: log.debug(f"No association for {self.rtdname}") return True
def pre_delete(self, graph: Graph) -> bool: ec2 = aws_resource(self, "ec2", graph) security_group = ec2.SecurityGroup(self.id) remove_ingress = [] remove_egress = [] for permission in security_group.ip_permissions: if ( "UserIdGroupPairs" in permission and len(permission["UserIdGroupPairs"]) > 0 ): p = copy.deepcopy(permission) remove_ingress.append(p) log.debug( f"Adding incoming permission {p} of {self.kind} {self.dname} to removal list" ) for permission in security_group.ip_permissions_egress: if ( "UserIdGroupPairs" in permission and len(permission["UserIdGroupPairs"]) > 0 ): p = copy.deepcopy(permission) remove_egress.append(p) log.debug( f"Adding outgoing permission {p} of {self.kind} {self.dname} to removal list" ) if len(remove_ingress) > 0: security_group.revoke_ingress(IpPermissions=remove_ingress) if len(remove_egress) > 0: security_group.revoke_egress(IpPermissions=remove_egress) return True
def do_action(self, data: Dict) -> None: log.debug("Cleanup Untagged called") cg = CoreGraph() self.config.read() # runtime read in case config file was updated since last run tags_part = 'not(has_key(tags, ["' + '", "'.join(self.config["tags"]) + '"]))' kinds_part = 'is(["' + '", "'.join(self.config["kinds"]) + '"])' account_parts = [] for cloud_id, account in self.config["accounts"].items(): for account_id, account_data in account.items(): age = delta_to_str(account_data.get("age")) account_part = ( f'(/ancestors.cloud.id == "{cloud_id}" and ' f'/ancestors.account.id == "{account_id}" and ' f"age > {age})" ) account_parts.append(account_part) accounts_part = "(" + " or ".join(account_parts) + ")" exclusion_part = "/metadata.protected == false and /metadata.phantom == false and /metadata.cleaned == false" required_tags = ", ".join(self.config["tags"]) reason = ( f"Missing one or more of required tags {required_tags}" " and age more than threshold" ) command = f'query {exclusion_part} and {kinds_part} and {tags_part} and {accounts_part} | clean "{reason}"' for node_data in cg.execute(command): node = node_from_dict(node_data) log.debug( f"Marking {node.rtdname} with age {node.age} for cleanup for" f" missing one or more of tags: {required_tags}" )
def core_actions_processor(metrics: Metrics, query_uri: str, message: dict) -> None: if not isinstance(message, dict): log.error(f"Invalid message: {message}") return kind = message.get("kind") message_type = message.get("message_type") data = message.get("data") log.debug( f"Received message of kind {kind}, type {message_type}, data: {data}") if kind == "action": try: if message_type == "generate_metrics": start_time = time.time() update_metrics(metrics, query_uri) run_time = time.time() - start_time log.debug(f"Updated metrics for {run_time:.2f} seconds") else: raise ValueError(f"Unknown message type {message_type}") except Exception as e: log.exception(f"Failed to {message_type}: {e}") reply_kind = "action_error" else: reply_kind = "action_done" reply_message = { "kind": reply_kind, "message_type": message_type, "data": data, } return reply_message
def collect_plugin_graph(collector_plugin: BaseCollectorPlugin, args=None) -> Optional[Graph]: collector: BaseCollectorPlugin = collector_plugin() collector_name = f"collector_{collector.cloud}" resotolib.signal.set_thread_name(collector_name) if args is not None: ArgumentParser.args = args setup_logger("resotoworker") log.debug(f"Starting new collect process for {collector.cloud}") start_time = time() collector.start() collector.join(ArgumentParser.args.timeout) elapsed = time() - start_time if not collector.is_alive(): # The plugin has finished its work if not collector.finished: log.error(f"Plugin {collector.cloud} did not finish collection" " - ignoring plugin results") return None if not collector.graph.is_dag_per_edge_type(): log.error(f"Graph of plugin {collector.cloud} is not acyclic" " - ignoring plugin results") return None log.info( f"Collector of plugin {collector.cloud} finished in {elapsed:.4f}s" ) return collector.graph else: log.error( f"Plugin {collector.cloud} timed out - discarding Plugin graph") return None
def collect_account(account: AWSAccount, regions: List, args=None): collector_name = f"aws_{account.id}" resotolib.signal.set_thread_name(collector_name) if args is not None: ArgumentParser.args = args setup_logger("resotoworker-aws") log.debug(f"Starting new collect process for account {account.dname}") aac = AWSAccountCollector(regions, account) try: aac.collect() except botocore.exceptions.ClientError as e: log.exception( f"An AWS {e.response['Error']['Code']} error occurred while collecting account {account.dname}" ) metrics_unhandled_account_exceptions.labels( account=account.dname).inc() except Exception: log.exception( f"An unhandled error occurred while collecting AWS account {account.dname}" ) metrics_unhandled_account_exceptions.labels( account=account.dname).inc() return aac.graph
def get_org_accounts(filter_current_account=False): session = aws_session() client = session.client("organizations") accounts = [] try: response = client.list_accounts() accounts = response.get("Accounts", []) while response.get("NextToken") is not None: response = client.list_accounts(NextToken=response["NextToken"]) accounts.extend(response.get("Accounts", [])) except botocore.exceptions.ClientError as e: if e.response["Error"]["Code"] == "AccessDeniedException": log.error( "AWS error - missing permissions to list organization accounts" ) else: raise filter_account_id = current_account_id() if filter_current_account else -1 accounts = [ aws_account["Id"] for aws_account in accounts if aws_account["Id"] != filter_account_id ] for account in accounts: log.debug(f"AWS found org account {account}") log.info(f"AWS found a total of {len(accounts)} org accounts") return accounts
def add_event_listener( event_type: EventType, listener: Callable, blocking: bool = False, timeout: int = None, one_shot: bool = False, ) -> bool: """Add an Event Listener""" if not callable(listener): log.error( f"Error registering {listener} of type {type(listener)} with event" f" {event_type.name}") return False if timeout is None: if hasattr(ArgumentParser.args, "event_timeout"): timeout = ArgumentParser.args.event_timeout else: timeout = 900 log.debug(f"Registering {listener} with event {event_type.name}" f" (blocking: {blocking}, one-shot: {one_shot})") with _events_lock.write_access: if not event_listener_registered(event_type, listener): _events[event_type][listener] = { "blocking": blocking, "timeout": timeout, "one-shot": one_shot, "lock": Lock(), "pid": os.getpid(), } return True return False
def handler(sig, frame) -> None: """Handles Ctrl+c by letting the Collector() know to shut down""" current_pid = os.getpid() if current_pid == parent_pid: reason = f"Received shutdown signal {sig}" log.debug(f"Parent caught signal {sig} - dispatching shutdown event") # Dispatch shutdown event in parent process which also causes SIGTERM to be sent # to the process group and in turn causes the shutdown event in all child # processes. dispatch_event( Event(EventType.SHUTDOWN, { "reason": reason, "emergency": False })) else: reason = f"Received shutdown signal {sig} from parent process" log.debug( f"Child with PID {current_pid} shutting down" " - you might see exceptions from interrupted worker threads") # Child's threads have 3s to shut down before the following thread will # shut them down hard. kt = threading.Thread(target=delayed_exit, name="shutdown") kt.start() # Dispatch shutdown event in child process dispatch_event( Event(EventType.SHUTDOWN, { "reason": reason, "emergency": False }), blocking=False, ) sys.exit(0)
def update_model(graph: Graph, resotocore_base_uri: str, dump_json: bool = False, tempdir: str = None) -> None: model_uri = f"{resotocore_base_uri}/model" log.debug(f"Updating model via {model_uri}") model_json = json.dumps(graph.export_model(), indent=4) if dump_json: ts = datetime.now().strftime("%Y-%m-%d-%H-%M") with tempfile.NamedTemporaryFile( prefix=f"resoto-model-{ts}-", suffix=".json", delete=not dump_json, dir=tempdir, ) as model_outfile: log.info(f"Writing model json to file {model_outfile.name}") model_outfile.write(model_json.encode()) headers = { "Content-Type": "application/json", } if getattr(ArgumentParser.args, "psk", None): encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk) r = requests.patch(model_uri, data=model_json, headers=headers) if r.status_code != 200: log.error(r.content) raise RuntimeError(f"Failed to create model: {r.content}")
def __delitem__(self, key): if self.parent_resource and isinstance(self.parent_resource, BaseResource): log.debug(f"Calling parent resource to delete tag {key} in cloud") try: if self.parent_resource.delete_tag(key): log_msg = f"Successfully deleted tag {key} in cloud" self.parent_resource._changes.add("tags") self.parent_resource.log(log_msg) log.info((f"{log_msg} for {self.parent_resource.kind}" f" {self.parent_resource.id}")) return super().__delitem__(key) else: log_msg = f"Error deleting tag {key} in cloud" self.parent_resource.log(log_msg) log.error((f"{log_msg} for {self.parent_resource.kind}" f" {self.parent_resource.id}")) except Exception as e: log_msg = ( f"Unhandled exception while trying to delete tag {key} in cloud:" f" {type(e)} {e}") self.parent_resource.log(log_msg, exception=e) if self.parent_resource._raise_tags_exceptions: raise else: log.exception(log_msg) else: return super().__delitem__(key)
def collect_project(project_id: str, args=None, credentials=None) -> Optional[Dict]: """Collects an individual project. Is being called in collect() and either run within a thread or a spawned process. Depending on whether `--gcp-fork` was specified or not. Because the spawned process does not inherit any of our memory or file descriptors we are passing the already parsed `args` Namespace() to this method. """ project = GCPProject(project_id, {}) collector_name = f"gcp_{project.id}" resotolib.signal.set_thread_name(collector_name) if args is not None: ArgumentParser.args = args setup_logger("resotoworker-gcp") if credentials is not None: Credentials._credentials = credentials Credentials._initialized = True log.debug(f"Starting new collect process for project {project.dname}") try: gpc = GCPProjectCollector(project) gpc.collect() except Exception: log.exception( f"An unhandled error occurred while collecting {project.rtdname}" ) else: return gpc.graph
def shutdown(self, event: Event = None) -> None: log.debug( "Received shutdown event - shutting down resotocore task queue listener" ) self.shutdown_event.set() if self.ws: self.ws.close()
def do_action(self, data: Dict) -> None: log.info("Protect Snowflakes called") self.config.read() cg = CoreGraph() resource_parts = [] for cloud_id, accounts in self.config.items(): for account_id, regions in accounts.items(): for region_id, kinds in regions.items(): for kind, resources in kinds.items(): for resource_id in resources: log.debug( f"Protecting {resource_id} of kind {kind} in" f" region {region_id} account {account_id}" f" cloud {cloud_id}") resource_parts.append( f'(reported.id == "{resource_id}"' f' and reported.kind == "{kind}"' f' and metadata.ancestors.region.id == "{region_id}"' f' and metadata.ancestors.cloud.id == "{cloud_id}")' ) resource_part = " or ".join(resource_parts) command = f"query {resource_part} | protect" for node_data in cg.execute(command): node = node_from_dict(node_data) log.debug(f"Protected {node.rtdname}")
def kill_children(signal: Signals = SIGTERM, ensure_death: bool = False, timeout: int = 3) -> None: procs = psutil.Process().children(recursive=True) num_children = len(procs) if num_children == 0: return elif num_children == 1: log_suffix = "" else: log_suffix = "ren" log.debug(f"Sending {signal.name} to {num_children} child{log_suffix}.") for p in procs: if signal == SIGTERM: p.terminate() else: p.send_signal(signal) if ensure_death: _, alive = psutil.wait_procs(procs, timeout=timeout) for p in alive: log.debug( f"Child with PID {p.pid} is still alive, sending SIGKILL") p.kill()
def resolve_deferred_connections(self): if getattr(ArgumentParser.args, "ignore_deferred_connections", False): log.debug("Ignoring deferred graph connections") return log.debug("Resolving deferred graph connections") for node in self.nodes: if isinstance(node, BaseResource): node.resolve_deferred_connections(self)
def search_first_all(self, match: Dict): """Return the first graph node that matches multiple attributes and values""" node = next(iter(self.searchall(match)), None) if node: log.debug(f"Found node {node} with {match}") else: log.debug(f"Found no node with {match}") return node
def search_first(self, attr, value): """Return the first graph node that matches a certain attribute value""" node = next(iter(self.search(attr, value)), None) if node: log.debug(f"Found node {node} with {attr}: {value}") else: log.debug(f"Found no node with {attr}: {value}") return node
def query(self, query: str, edge_type: Optional[EdgeType] = None): log.debug(f"Sending query {query}") headers = {"Accept": "application/x-ndjson"} query_endpoint = self.query_uri if edge_type is not None: query_string = urlencode({"edge_type": edge_type.value}) query_endpoint += f"?{query_string}" return self.post(query_endpoint, query, headers)
def execute(self, command: str): log.debug(f"Executing command: {command}") headers = {"Accept": "application/x-ndjson", "Content-Type": "text/plain"} execute_endpoint = f"{self.base_uri}/cli/execute" if self.graph_name: query_string = urlencode({"graph": self.graph_name}) execute_endpoint += f"?{query_string}" return self.post(execute_endpoint, command, headers)
def __iter__(self): for node in self.graph.nodes: if not node.changes.changed: continue node_dict = node_to_dict(node, changes_only=True) node_json = json.dumps(node_dict) + "\n" log.debug(f"Updating node {node_dict}") yield node_json.encode()
def regions(self) -> List: if len(self.__regions) == 0: if not ArgumentParser.args.aws_region: log.debug("AWS region not specified, assuming all regions") self.__regions = all_regions() else: self.__regions = ArgumentParser.args.aws_region return self.__regions
def get_configs(resotocore_uri: str = None, psk: str = None) -> Dict: resotocore_uri, psk, headers = default_args(resotocore_uri, psk) log.debug("Getting configs") r = requests.get(f"{resotocore_uri}/configs", headers=headers) if r.status_code == 200: return r.json() raise RuntimeError(f"Error getting configs: {r.content.decode('utf-8')}")
def clean(self, value: bool) -> None: if self.phantom and value: raise ValueError(f"Can't cleanup phantom resource {self.rtdname}") clean_str = "" if value else "not " self.log(f"Setting to {clean_str}be cleaned") log.debug(f"Setting {self.rtdname} to {clean_str}be cleaned") self._changes.add("clean") self._clean = value
def dispatch_event(event: Event, blocking: bool = False) -> None: """Dispatch an Event""" waiting_str = "" if blocking else "not " log.debug( f"Dispatching event {event.event_type.name} and {waiting_str}waiting for" " listeners to return") if event.event_type not in _events.keys(): return with _events_lock.read_access: # Event listeners might unregister themselves during event dispatch # so we will work on a shallow copy while processing the current event. listeners = dict(_events[event.event_type]) threads = {} for listener, listener_data in listeners.items(): try: if listener_data["pid"] != os.getpid(): continue if listener_data["one-shot"] and not listener_data["lock"].acquire( blocking=False): log.error(f"Not calling one-shot listener {listener} of type" f" {type(listener)} - can't acquire lock") continue log.debug(f"Calling listener {listener} of type {type(listener)}" f" (blocking: {listener_data['blocking']})") thread_name = (f"{event.event_type.name.lower()}_event" f"-{getattr(listener, '__name__', 'anonymous')}") t = Thread(target=listener, args=[event], name=thread_name) if blocking or listener_data["blocking"]: threads[t] = listener t.start() except Exception: log.exception("Caught unhandled event callback exception") finally: if listener_data["one-shot"]: log.debug( f"One-shot specified for event {event.event_type.name} " f"listener {listener} - removing event listener") remove_event_listener(event.event_type, listener) listener_data["lock"].release() start_time = time.time() for thread, listener in threads.items(): timeout = start_time + listeners[listener]["timeout"] - time.time() if timeout < 1: timeout = 1 log.debug( f"Waiting up to {timeout:.2f}s for event listener {thread.name} to finish" ) thread.join(timeout) log.debug( f"Event listener {thread.name} finished (timeout: {thread.is_alive()})" )
def collect(self) -> None: log.debug("plugin: AWS collecting resources") if not self.authenticated: log.error("Failed to authenticate - skipping collection") return if (ArgumentParser.args.aws_assume_current and not ArgumentParser.args.aws_dont_scrape_current): log.warning( "You specified --aws-assume-current but not --aws-dont-scrape-current! " "This will result in the same account being scraped twice and is likely not what you want." ) if ArgumentParser.args.aws_role and ArgumentParser.args.aws_scrape_org: accounts = [ AWSAccount(aws_account_id, {}, role=ArgumentParser.args.aws_role) for aws_account_id in get_org_accounts( filter_current_account=not ArgumentParser.args. aws_assume_current) if aws_account_id not in ArgumentParser.args.aws_scrape_exclude_account ] if not ArgumentParser.args.aws_dont_scrape_current: accounts.append(AWSAccount(current_account_id(), {})) elif ArgumentParser.args.aws_role and ArgumentParser.args.aws_account: accounts = [ AWSAccount(aws_account_id, {}, role=ArgumentParser.args.aws_role) for aws_account_id in ArgumentParser.args.aws_account ] else: accounts = [AWSAccount(current_account_id(), {})] max_workers = (len(accounts) if len(accounts) < ArgumentParser.args.aws_account_pool_size else ArgumentParser.args.aws_account_pool_size) pool_args = {"max_workers": max_workers} if ArgumentParser.args.aws_fork: pool_args["mp_context"] = multiprocessing.get_context("spawn") pool_args["initializer"] = resotolib.signal.initializer pool_executor = futures.ProcessPoolExecutor else: pool_executor = futures.ThreadPoolExecutor with pool_executor(**pool_args) as executor: wait_for = [ executor.submit(collect_account, account, self.regions, ArgumentParser.args) for account in accounts ] for future in futures.as_completed(wait_for): account_graph = future.result() if not isinstance(account_graph, Graph): log.error( f"Returned account graph has invalid type {type(account_graph)}" ) continue self.graph.merge(account_graph)
def remove_event_listener(event_type: EventType, listener: Callable) -> bool: """Remove an Event Listener""" with _events_lock.write_access: if event_listener_registered(event_type, listener): log.debug(f"Removing {listener} from event {event_type.name}") del _events[event_type][listener] if len(_events[event_type]) == 0: del _events[event_type] return True return False
def get_config(config_id: str, resotocore_uri: str = None, psk: str = None) -> Dict: resotocore_uri, psk, headers = default_args(resotocore_uri, psk) log.debug(f"Getting config {config_id}") r = requests.get(f"{resotocore_uri}/config/{config_id}", headers=headers) if r.status_code == 200: return r.json() elif r.status_code == 404: raise ConfigNotFoundError(f"Config {config_id} does not exist") raise RuntimeError(f"Error getting config {config_id}: {r.content.decode('utf-8')}")
def pre_delete(self, graph: Graph) -> bool: iam = aws_resource(self, "iam", graph) instance_profile = iam.InstanceProfile(self.name) for predecessor in self.predecessors(graph, edge_type=EdgeType.delete): if isinstance(predecessor, AWSIAMRole): log_msg = f"Detaching {predecessor.rtdname}" self.log(log_msg) log.debug(f"{log_msg} for deletion of {self.rtdname}") instance_profile.remove_role(RoleName=predecessor.name) return True
def delete_config(config_id: str, resotocore_uri: str = None, psk: str = None) -> bool: resotocore_uri, psk, headers = default_args(resotocore_uri, psk) log.debug(f"Deleting config {config_id}") r = requests.delete(f"{resotocore_uri}/config/{config_id}", headers=headers) if r.status_code == 204: return True raise RuntimeError( f"Error deleting config {config_id}: {r.content.decode('utf-8')}" )