Beispiel #1
0
def add_event_listener(
    event_type: EventType,
    listener: Callable,
    blocking: bool = False,
    timeout: int = 900,
    one_shot: bool = False,
) -> bool:
    """Add an Event Listener"""
    if not callable(listener):
        log.error(
            f"Error registering {listener} of type {type(listener)} with event"
            f" {event_type.name}")
        return False

    log.debug(f"Registering {listener} with event {event_type.name}"
              f" (blocking: {blocking}, one-shot: {one_shot})")
    with _events_lock.write_access:
        if not event_listener_registered(event_type, listener):
            _events[event_type][listener] = {
                "blocking": blocking,
                "timeout": timeout,
                "one-shot": one_shot,
                "lock": Lock(),
                "pid": os.getpid(),
            }
            return True
        return False
Beispiel #2
0
 def __delitem__(self, key):
     if self.parent_resource and isinstance(self.parent_resource,
                                            BaseResource):
         log.debug(f"Calling parent resource to delete tag {key} in cloud")
         try:
             if self.parent_resource.delete_tag(key):
                 log_msg = f"Successfully deleted tag {key} in cloud"
                 self.parent_resource._changes.add("tags")
                 self.parent_resource.log(log_msg)
                 log.info((f"{log_msg} for {self.parent_resource.kind}"
                           f" {self.parent_resource.id}"))
                 return super().__delitem__(key)
             else:
                 log_msg = f"Error deleting tag {key} in cloud"
                 self.parent_resource.log(log_msg)
                 log.error((f"{log_msg} for {self.parent_resource.kind}"
                            f" {self.parent_resource.id}"))
         except Exception as e:
             log_msg = f"Unhandled exception while trying to delete tag {key} in cloud:" f" {type(e)} {e}"
             self.parent_resource.log(log_msg, exception=e)
             if self.parent_resource._raise_tags_exceptions:
                 raise
             else:
                 log.exception(log_msg)
     else:
         return super().__delitem__(key)
Beispiel #3
0
def core_actions_processor(metrics: Metrics, search_uri: str,
                           tls_data: TLSData, message: dict) -> None:
    if not isinstance(message, dict):
        log.error(f"Invalid message: {message}")
        return
    kind = message.get("kind")
    message_type = message.get("message_type")
    data = message.get("data")
    log.debug(
        f"Received message of kind {kind}, type {message_type}, data: {data}")
    if kind == "action":
        try:
            if message_type == "generate_metrics":
                start_time = time.time()
                update_metrics(metrics, search_uri, tls_data)
                run_time = time.time() - start_time
                log.debug(f"Updated metrics for {run_time:.2f} seconds")
            else:
                raise ValueError(f"Unknown message type {message_type}")
        except Exception as e:
            log.exception(f"Failed to {message_type}: {e}")
            reply_kind = "action_error"
        else:
            reply_kind = "action_done"

        reply_message = {
            "kind": reply_kind,
            "message_type": message_type,
            "data": data,
        }
        return reply_message
Beispiel #4
0
def handler(sig, frame) -> None:
    """Handles Ctrl+c by letting the Collector() know to shut down"""
    current_pid = os.getpid()
    if current_pid == parent_pid:
        reason = f"Received shutdown signal {sig}"
        log.debug(f"Parent caught signal {sig} - dispatching shutdown event")
        # Dispatch shutdown event in parent process which also causes SIGTERM to be sent
        # to the process group and in turn causes the shutdown event in all child
        # processes.
        dispatch_event(
            Event(EventType.SHUTDOWN, {
                "reason": reason,
                "emergency": False
            }))
    else:
        reason = f"Received shutdown signal {sig} from parent process"
        log.debug(
            f"Child with PID {current_pid} shutting down"
            " - you might see exceptions from interrupted worker threads")
        # Child's threads have 3s to shut down before the following thread will
        # shut them down hard.
        kt = threading.Thread(target=delayed_exit, name="shutdown")
        kt.start()
        # Dispatch shutdown event in child process
        dispatch_event(
            Event(EventType.SHUTDOWN, {
                "reason": reason,
                "emergency": False
            }),
            blocking=False,
        )
        sys.exit(0)
Beispiel #5
0
    def pre_delete(self, graph: Graph) -> bool:
        ec2 = aws_resource(self, "ec2", graph)
        security_group = ec2.SecurityGroup(self.id)
        remove_ingress = []
        remove_egress = []

        for permission in security_group.ip_permissions:
            if "UserIdGroupPairs" in permission and len(permission["UserIdGroupPairs"]) > 0:
                p = copy.deepcopy(permission)
                remove_ingress.append(p)
                log.debug(f"Adding incoming permission {p} of {self.kind} {self.dname} to removal list")

        for permission in security_group.ip_permissions_egress:
            if "UserIdGroupPairs" in permission and len(permission["UserIdGroupPairs"]) > 0:
                p = copy.deepcopy(permission)
                remove_egress.append(p)
                log.debug(f"Adding outgoing permission {p} of {self.kind} {self.dname} to removal list")

        if len(remove_ingress) > 0:
            security_group.revoke_ingress(IpPermissions=remove_ingress)

        if len(remove_egress) > 0:
            security_group.revoke_egress(IpPermissions=remove_egress)

        return True
Beispiel #6
0
 def pre_delete(self, graph: Graph) -> bool:
     if self.association_id is not None:
         ec2 = aws_client(self, "ec2", graph=graph)
         ec2.disassociate_address(AssociationId=self.association_id)
     else:
         log.debug(f"No association for {self.rtdname}")
     return True
Beispiel #7
0
    def action_processor(self, message: Dict) -> None:
        """Process incoming action messages"""
        if not isinstance(message, dict):
            log.error(f"Invalid message: {message}")
            return
        kind = message.get("kind")
        message_type = message.get("message_type")
        data = message.get("data")
        log.debug(
            f"Received message of kind {kind}, type {message_type}, data: {data}"
        )
        if kind == "action":
            try:
                if message_type == self.action:
                    start_time = time.time()
                    self.do_action(data)
                    run_time = int(time.time() - start_time)
                    log.debug(f"{self.action} ran for {run_time} seconds")
                else:
                    raise ValueError(f"Unknown message type {message_type}")
            except Exception as e:
                log.exception(f"Failed to {message_type}: {e}")
                reply_kind = "action_error"
            else:
                reply_kind = "action_done"

            reply_message = {
                "kind": reply_kind,
                "message_type": message_type,
                "data": data,
            }
            return reply_message
Beispiel #8
0
    def do_action(self, data: Dict) -> None:
        log.info("Protector called")
        Config.plugin_protector.validate(Config.plugin_protector)
        self.config = deepcopy(Config.plugin_protector.config)

        cg = CoreGraph(tls_data=self.tls_data)
        resource_parts = []
        for cloud_id, accounts in self.config.items():
            for account_id, regions in accounts.items():
                for region_id, kinds in regions.items():
                    for kind, resources in kinds.items():
                        for resource_id in resources:
                            log.debug(
                                f"Protecting {resource_id} of kind {kind} in"
                                f" region {region_id} account {account_id}"
                                f" cloud {cloud_id}")
                            resource_parts.append(
                                f'(/reported.id == "{resource_id}"'
                                f' and /reported.kind == "{kind}"'
                                f' and /ancestors.region.reported.id == "{region_id}"'
                                f' and /ancestors.cloud.reported.id == "{cloud_id}")'
                            )
        resource_part = " or ".join(resource_parts)
        command = f"search {resource_part} | protect"
        for node_data in cg.execute(command):
            node = node_from_dict(node_data)
            log.debug(f"Protected {node.rtdname}")
Beispiel #9
0
 def shutdown(self, event: Event = None) -> None:
     log.debug(
         "Received shutdown event - shutting down resotocore task queue listener"
     )
     self.shutdown_event.set()
     if self.ws:
         self.ws.close()
Beispiel #10
0
    def do_action(self, data: Dict) -> None:
        log.debug("Cleanup Untagged called")
        cg = CoreGraph(tls_data=self.tls_data)
        config = deepcopy(Config.plugin_cleanup_untagged.config)

        tags_part = 'not(has_key(tags, ["' + '", "'.join(
            config["tags"]) + '"]))'
        kinds_part = 'is(["' + '", "'.join(config["kinds"]) + '"])'
        account_parts = []
        for cloud_id, account in config["accounts"].items():
            for account_id, account_data in account.items():
                age = delta_to_str(account_data.get("age"))
                account_part = (f'(/ancestors.cloud.id == "{cloud_id}" and '
                                f'/ancestors.account.id == "{account_id}" and '
                                f"age > {age})")
                account_parts.append(account_part)
        accounts_part = "(" + " or ".join(account_parts) + ")"
        exclusion_part = "/metadata.protected == false and /metadata.phantom == false and /metadata.cleaned == false"
        required_tags = ", ".join(config["tags"])
        reason = f"Missing one or more of required tags {required_tags}" " and age more than threshold"
        command = f'query {exclusion_part} and {kinds_part} and {tags_part} and {accounts_part} | clean "{reason}"'
        for node_data in cg.execute(command):
            node = node_from_dict(node_data)
            log.debug(
                f"Marking {node.rtdname} with age {node.age} for cleanup for"
                f" missing one or more of tags: {required_tags}")
Beispiel #11
0
 def __core_metadata(
     client: ResotoClient,
 ) -> Tuple[List[CommandInfo], List[str], List[str]]:
     try:
         log.debug("Fetching core metadata..")
         model = client.model()
         known_kinds = {
             k
             for k, v in model.kinds.items() if v.properties is not None
         }
         known_props = {
             p.name
             for k, v in model.kinds.items() if v.properties is not None
             for p in v.properties
         }
         info = client.cli_info()
         cmds = [
             jsons.load(cmd, CommandInfo)
             for cmd in info.get("commands", [])
         ]
         return cmds, sorted(known_kinds), sorted(known_props)
     except Exception as ex:
         log.warning(
             f"Can not load metadata from core: {ex}. No suggestions as fallback.",
             exc_info=ex,
         )
         return [], [], []
Beispiel #12
0
def collect_account(
    account: AWSAccount,
    regions: List,
    args: Namespace = None,
    running_config: RunningConfig = None,
) -> Graph:
    collector_name = f"aws_{account.id}"
    resotolib.proc.set_thread_name(collector_name)

    if args is not None:
        ArgumentParser.args = args
        setup_logger("resotoworker-aws")
    if running_config is not None:
        Config.running_config.apply(running_config)

    log.debug(f"Starting new collect process for account {account.dname}")

    aac = AWSAccountCollector(regions, account)
    try:
        aac.collect()
    except botocore.exceptions.ClientError as e:
        log.exception(f"An AWS {e.response['Error']['Code']} error occurred while collecting account {account.dname}")
        metrics_unhandled_account_exceptions.labels(account=account.dname).inc()
    except Exception:
        log.exception(f"An unhandled error occurred while collecting AWS account {account.dname}")
        metrics_unhandled_account_exceptions.labels(account=account.dname).inc()

    return aac.graph
Beispiel #13
0
def kill_children(signal: Signals = SIGTERM,
                  ensure_death: bool = False,
                  timeout: int = 3) -> None:
    procs = psutil.Process().children(recursive=True)
    num_children = len(procs)
    if num_children == 0:
        return
    elif num_children == 1:
        log_suffix = ""
    else:
        log_suffix = "ren"

    log.debug(f"Sending {signal.name} to {num_children} child{log_suffix}.")
    for p in procs:
        if signal == SIGTERM:
            p.terminate()
        else:
            p.send_signal(signal)

    if ensure_death:
        _, alive = psutil.wait_procs(procs, timeout=timeout)
        for p in alive:
            log.debug(
                f"Child with PID {p.pid} is still alive, sending SIGKILL")
            p.kill()
Beispiel #14
0
    def connect(self) -> None:
        resotocore_ws_uri_split = urlsplit(self.resotocore_ws_uri)
        scheme = resotocore_ws_uri_split.scheme
        netloc = resotocore_ws_uri_split.netloc
        path = resotocore_ws_uri_split.path + "/work/queue"
        query_dict = {"task": ",".join(self.tasks)}
        query_dict.update(
            {k: ",".join(v)
             for k, v in self.task_queue_filter.items()})
        query = urlencode(query_dict)
        ws_uri = urlunsplit((scheme, netloc, path, query, ""))

        log.debug(f"{self.identifier} connecting to {ws_uri}")
        headers = {}
        if getattr(ArgumentParser.args, "psk", None):
            encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk)
        self.ws = websocket.WebSocketApp(
            ws_uri,
            header=headers,
            on_open=self.on_open,
            on_message=self.on_message,
            on_error=self.on_error,
            on_close=self.on_close,
            on_ping=self.on_ping,
            on_pong=self.on_pong,
        )
        sslopt = None
        if self.tls_data:
            sslopt = {"ca_certs": self.tls_data.ca_cert_path}
        self.ws.run_forever(sslopt=sslopt,
                            ping_interval=30,
                            ping_timeout=10,
                            ping_payload="ping")
Beispiel #15
0
 def add_plugin_config(self, config: Config) -> None:
     """Add plugin config to the config object"""
     if not initialized:
         self.find_plugins()
     log.debug("Adding plugin config")
     for type_plugins in plugins.values():  # iterate over all PluginTypes
         for Plugin in type_plugins:  # iterate over each Plugin of each PluginType
             Plugin.add_config(config)
Beispiel #16
0
 def search(self, search: str, edge_type: Optional[EdgeType] = None):
     log.debug(f"Sending search {search}")
     headers = {"Accept": "application/x-ndjson"}
     search_endpoint = self.search_uri
     if edge_type is not None:
         query_string = urlencode({"edge_type": edge_type.value})
         search_endpoint += f"?{query_string}"
     return self.post(search_endpoint, search, headers, verify=self.verify)
Beispiel #17
0
 def __iter__(self):
     for node in self.graph.nodes:
         if not node.changes.changed:
             continue
         node_dict = node_to_dict(node, changes_only=True)
         node_json = json.dumps(node_dict) + "\n"
         log.debug(f"Updating node {node_dict}")
         yield node_json.encode()
Beispiel #18
0
 def regions(self) -> List:
     if len(self.__regions) == 0:
         if not Config.aws.region or (isinstance(Config.aws.region, list) and len(Config.aws.region) == 0):
             log.debug("AWS region not specified, assuming all regions")
             self.__regions = all_regions()
         else:
             self.__regions = list(Config.aws.region)
     return self.__regions
Beispiel #19
0
def get_configs(resotocore_uri: str = None, psk: str = None, verify: Optional[str] = None) -> List:
    resotocore_uri, psk, headers = default_args(resotocore_uri, psk)

    log.debug("Getting configs")
    r = requests.get(f"{resotocore_uri}/configs", headers=headers, verify=verify)
    if r.status_code == 200:
        return r.json()
    raise RuntimeError(f"Error getting configs: {r.content.decode('utf-8')}")
Beispiel #20
0
    def clean(self, value: bool) -> None:
        if self.phantom and value:
            raise ValueError(f"Can't cleanup phantom resource {self.rtdname}")

        clean_str = "" if value else "not "
        self.log(f"Setting to {clean_str}be cleaned")
        log.debug(f"Setting {self.rtdname} to {clean_str}be cleaned")
        self._changes.add("clean")
        self._clean = value
Beispiel #21
0
 def add_plugin_args(self, arg_parser: ArgumentParser) -> None:
     """Add args to the arg parser"""
     if not initialized:
         self.find_plugins()
     log.debug("Adding plugin args")
     for type_plugins in plugins.values():  # iterate over all PluginTypes
         for Plugin in type_plugins:  # iterate over each Plugin of each PluginType
             Plugin.add_args(
                 arg_parser)  # add that Plugin's args to the ArgumentParser
Beispiel #22
0
def dispatch_event(event: Event, blocking: bool = False) -> None:
    """Dispatch an Event"""
    waiting_str = "" if blocking else "not "
    log.debug(
        f"Dispatching event {event.event_type.name} and {waiting_str}waiting for"
        " listeners to return")

    if event.event_type not in _events.keys():
        return

    with _events_lock.read_access:
        # Event listeners might unregister themselves during event dispatch
        # so we will work on a shallow copy while processing the current event.
        listeners = dict(_events[event.event_type])

    threads = {}
    for listener, listener_data in listeners.items():
        try:
            if listener_data["pid"] != os.getpid():
                continue

            if listener_data["one-shot"] and not listener_data["lock"].acquire(
                    blocking=False):
                log.error(f"Not calling one-shot listener {listener} of type"
                          f" {type(listener)} - can't acquire lock")
                continue

            log.debug(f"Calling listener {listener} of type {type(listener)}"
                      f" (blocking: {listener_data['blocking']})")
            thread_name = f"{event.event_type.name.lower()}_event" f"-{getattr(listener, '__name__', 'anonymous')}"
            t = Thread(target=listener, args=[event], name=thread_name)
            if blocking or listener_data["blocking"]:
                threads[t] = listener
            t.start()
        except Exception:
            log.exception("Caught unhandled event callback exception")
        finally:
            if listener_data["one-shot"]:
                log.debug(
                    f"One-shot specified for event {event.event_type.name} "
                    f"listener {listener} - removing event listener")
                remove_event_listener(event.event_type, listener)
                listener_data["lock"].release()

    start_time = time.time()
    for thread, listener in threads.items():
        timeout = start_time + listeners[listener]["timeout"] - time.time()
        if timeout < 1:
            timeout = 1
        log.debug(
            f"Waiting up to {timeout:.2f}s for event listener {thread.name} to finish"
        )
        thread.join(timeout)
        log.debug(
            f"Event listener {thread.name} finished (timeout: {thread.is_alive()})"
        )
Beispiel #23
0
 def run(self) -> None:
     self.name = "eventbus-listener"
     add_event_listener(EventType.SHUTDOWN, self.shutdown)
     while not self.shutdown_event.is_set():
         log.debug("Connecting to resotocore event bus")
         try:
             self.connect()
         except Exception as e:
             log.error(e)
         time.sleep(1)
Beispiel #24
0
 def pre_delete(self, graph: Graph) -> bool:
     ec2 = aws_resource(self, "ec2", graph)
     rt = ec2.RouteTable(self.id)
     for rta in rt.associations:
         if not rta.main:
             log_msg = f"Deleting route table association {rta.id}"
             self.log(log_msg)
             log.debug(f"{log_msg} for cleanup of {self.kind} {self.dname}")
             rta.delete()
     return True
Beispiel #25
0
 def on_config_event(self, message: Dict[str, Any]) -> None:
     if (message.get("message_type") == "config-updated"
             and message.get("data", {}).get("id") == self.config_name
             and message.get("data", {}).get("revision") !=
             Config.running_config.revision):
         try:
             log.debug(f"Config {self.config_name} has changed - reloading")
             self.load_config(reload=True)
         except Exception:
             log.exception("Failed to reload config")
Beispiel #26
0
 def pre_delete(self, graph: Graph) -> bool:
     ec2 = aws_resource(self, "ec2", graph)
     internet_gateway = ec2.InternetGateway(self.id)
     for predecessor in self.predecessors(graph, edge_type=EdgeType.delete):
         if isinstance(predecessor, AWSVPC):
             log_msg = f"Detaching {predecessor.kind} {predecessor.dname}"
             self.log(log_msg)
             log.debug(f"{log_msg} for deletion of {self.kind} {self.dname}")
             internet_gateway.detach_from_vpc(VpcId=predecessor.id)
     return True
Beispiel #27
0
 def pre_delete(self, graph: Graph) -> bool:
     iam = aws_resource(self, "iam", graph)
     instance_profile = iam.InstanceProfile(self.name)
     for predecessor in self.predecessors(graph, edge_type=EdgeType.delete):
         if isinstance(predecessor, AWSIAMRole):
             log_msg = f"Detaching {predecessor.rtdname}"
             self.log(log_msg)
             log.debug(f"{log_msg} for deletion of {self.rtdname}")
             instance_profile.remove_role(RoleName=predecessor.name)
     return True
Beispiel #28
0
 def update_age(self) -> None:
     try:
         self.age = parse_delta(
             Config.plugin_cleanup_aws_loadbalancers.min_age)
         log.debug(f"Cleanup AWS Load balancers minimum age is {self.age}")
     except ValueError:
         log.error(
             "Error while parsing Cleanup AWS Load balancers minimum age"
             f" {Config.plugin_cleanup_aws_loadbalancers.min_age}")
         raise
Beispiel #29
0
 def add_plugin(self, plugin) -> bool:
     """Adds a Plugin class to the list of Plugins"""
     global plugins
     if (inspect.isclass(plugin) and not inspect.isabstract(plugin)
             and issubclass(plugin, (BasePlugin, BaseActionPlugin))
             and plugin.plugin_type in plugins):
         log.debug(f"Found plugin {plugin} ({plugin.plugin_type.name})")
         if plugin not in plugins[plugin.plugin_type]:
             plugins[plugin.plugin_type].append(plugin)
     return True
Beispiel #30
0
def remove_event_listener(event_type: EventType, listener: Callable) -> bool:
    """Remove an Event Listener"""
    with _events_lock.write_access:
        if event_listener_registered(event_type, listener):
            log.debug(f"Removing {listener} from event {event_type.name}")
            del _events[event_type][listener]
            if len(_events[event_type]) == 0:
                del _events[event_type]
            return True
        return False