def _start_rstudio(cls, lb: LabBook, pr: ProxyRouter, username: str, container_override_id: str = None): lb_ip = ContainerOperations.get_labbook_ip(lb, username) lb_endpoint = f'http://{lb_ip}:8787' mitm_endpoint = MITMProxyOperations.get_mitmendpoint(lb_endpoint) # start mitm proxy if it doesn't exist if mitm_endpoint is None: # get a proxy prefix unique_key = unique_id() # start proxy mitm_endpoint = MITMProxyOperations.start_mitm_proxy( lb_endpoint, unique_key) # Ensure we start monitor when starting MITM proxy start_labbook_monitor( lb, username, "rstudio", # This is the endpoint for the proxy and not the rserver? url=f'{lb_endpoint}/{unique_key}', author=get_logged_in_author()) # All messages will come through MITM, so we don't need to monitor rserver directly start_rserver(lb, username, tag=container_override_id) # add route rt_prefix, _ = pr.add(mitm_endpoint, f'rserver/{unique_key}/') # Warning: RStudio will break if there is a trailing slash! suffix = f'/{rt_prefix}' else: # existing route to MITM or not? matched_routes = pr.get_matching_routes(mitm_endpoint, 'rserver') if len(matched_routes) == 1: suffix = matched_routes[0] elif len(matched_routes) == 0: logger.warning( 'Creating missing route for existing RStudio mitmproxy_proxy' ) # TODO DC: This feels redundant with already getting the mitm_endpoint above # Can we refactor this into a more coherent single operation? Maybe an MITMProxy instance? unique_key = MITMProxyOperations.get_mitmkey(lb_endpoint) # add route rt_prefix, _ = pr.add(mitm_endpoint, f'rserver/{unique_key}/') # Warning: RStudio will break if there is a trailing slash! suffix = f'/{rt_prefix}' else: raise ValueError( f"Multiple RStudio proxy instances for {str(lb)}. Please restart the Project " "or manually delete stale containers.") return suffix
def _stop_container(cls, lb, username): """Stop container and also do necessary cleanup of confhttpproxy, monitors, etc. Currently, this supports two cases, applications monitored by MITMProxy, and Jupyter. So, for now, if we can't find an mitmproxy endpoint, we assume we're dealing with a jupyter container. """ pr = confhttpproxy.ProxyRouter.get_proxy( lb.client_config.config['proxy']) # Remove route from proxy lb_name = ContainerOperations.labbook_image_name(lb, username) if MITMProxyOperations.get_mitmendpoint(lb_name): # there is an MITMProxy (currently only used for RStudio) proxy_endpoint = MITMProxyOperations.stop_mitm_proxy(lb_name) tool = 'rserver' else: lb_ip = ContainerOperations.get_labbook_ip(lb, username) # The only alternative to mitmproxy (currently) is jupyter # TODO in #453: Construction of this URL should be encapsulated in Jupyter Dev Tool logic proxy_endpoint = f'http://{lb_ip}:8888' tool = 'jupyter' est_target = pr.get_matching_routes(proxy_endpoint, tool) for i, target in enumerate(est_target): if i == 1: # We have > 1 entry in the router, which shouldn't happen logger.warning( f'Removing multiple routes for {tool} on {proxy_endpoint} during Project container stop.' ) pr.remove(target[1:]) wf = LabbookWorkflow(lb) wf.garbagecollect() # Clean up empty bind mount dirs from datasets if needed submodules = lb.git.list_submodules() for submodule in submodules: namespace, dataset_name = submodule['name'].split("&") bind_location = os.path.join(lb.root_dir, 'input', dataset_name) if os.path.isdir(bind_location): os.rmdir(bind_location) # stop labbook monitor stop_labbook_monitor(lb, username) lb, stopped = ContainerOperations.stop_container(labbook=lb, username=username) if not stopped: # TODO DK: Why would stopped=False? Should this move up?? raise ValueError(f"Failed to stop labbook {lb.name}")
def _start_rstudio(cls, labbook: LabBook, router: ProxyRouter, username: str, container_override_id: str = None): mitm_url, pr_suffix = MITMProxyOperations.configure_mitmroute(labbook, router, username) # All messages will come through MITM, so we don't need to monitor rserver directly start_rserver(labbook, username, tag=container_override_id) # Ensure monitor is running start_labbook_monitor(labbook, username, "rstudio", # the endpoint for the NGINX proxy running inside the mitmproxy container # (not the rserver) which maps `/rserver/<whatever>/<foo>` to `/<foo>`. # But url isn't used currently by monitor_rserver.RServerMonitor! url=mitm_url, author=get_logged_in_author()) return pr_suffix
def start(self, metadata: Dict[str, str], database: int = 1) -> None: """Method called in a periodically scheduled async worker that should check the dev env and manage Activity Monitor Instances as needed Args: metadata(dict): A dictionary of data to start the activity monitor database(int): The database ID to use Returns: None """ # Get connection to the DB redis_conn = redis.Redis(db=database) logfile_path = redis_conn.hget(self.monitor_key, "logfile_path") # TODO RB will need to open in write mode later to sparsify parts of the file that have already been read # https://github.com/gigantum/gigantum-client/issues/434, also part of #453 # open the log file mitmlog = open(logfile_path, "rb") if not mitmlog: logger.info(f"Failed to open RStudio log {logfile_path}") return try: while True: still_running = redis_conn.hget(self.monitor_key, "run") # Check if you should exit # sometimes this runs after key has been deleted. None is shutdown too. if not still_running or still_running.decode() == "False": logger.info( f"Received Activity Monitor Shutdown Message for {self.monitor_key}" ) redis_conn.delete(self.monitor_key) break previous_cells = len(self.cell_data) # Read activity and update aggregated "cell" data self.process_activity(mitmlog) # We are processing every second, then aggregating activity records when idle if previous_cells == len( self.cell_data) and self.current_cell.is_empty(): # there are no new cells in the last second, and no cells are in-process self.store_record() # Check for new records every second time.sleep(1) except Exception as e: logger.error( f"Fatal error in RStudio Server Activity Monitor: {e}") raise finally: # Delete the kernel monitor key so the dev env monitor will spin up a new process # You may lose some activity if this happens, but the next action will sweep up changes logger.info(f"Shutting down RStudio monitor {self.monitor_key}") redis_conn.delete(self.monitor_key) # At this point, there is no chance we'll get anything else out of unmonitored files! MITMProxyOperations.clean_logfiles()
def run(self, dev_env_monitor_key: str, database: int = 1) -> None: """Method called in a periodically scheduled async worker that should check the dev env and manage Activity Monitor Instances as needed Args: dev_env_monitor_key: The unique string used as the key in redis to track this DevEnvMonitor instance database: The redis database number for dev env monitors to use """ redis_conn = redis.Redis(db=database) activity_monitor_key = f'{dev_env_monitor_key}:activity_monitor' retval = redis_conn.hget(dev_env_monitor_key, 'container_name') if retval: labbook_container_name = retval.decode() else: # This shouldn't happen, but just in case logger.error( f'No container name for DevTool Monitor {dev_env_monitor_key}, stopping' ) # This should clean up everything this monitor is managing # labbook name is just for logging purposes, so we supply 'unknown' stop_dev_env_monitors(dev_env_monitor_key, redis_conn, 'unknown') return # For now, we directly query docker, this could be cleaned up in #453 client = get_docker_client() try: dev_env_container_status = client.containers.get( labbook_container_name).status except NotFound: dev_env_container_status = 'not found' # Clean up and return labbook container names for running proxies running_proxy_lb_names = MITMProxyOperations.get_running_proxies() # As part of #453, we should re-start the proxy if the dev tool is still running if labbook_container_name not in running_proxy_lb_names: # MITM proxy isn't running anymore. logger.info( f"Detected exited RStudio proxy {labbook_container_name}. Stopping monitoring for {activity_monitor_key}" ) logger.info(f"Running proxies: {running_proxy_lb_names}") # This should clean up everything it's managing stop_dev_env_monitors(dev_env_monitor_key, redis_conn, labbook_container_name) elif dev_env_container_status != "running": # RStudio container isn't running anymore. Clean up by setting run flag to `False` so worker exits logger.info( f"Detected exited RStudio Project {labbook_container_name}. Stopping monitoring for {activity_monitor_key}" ) logger.info(f"Running proxies: {running_proxy_lb_names}") # This should clean up everything it's managing stop_dev_env_monitors(dev_env_monitor_key, redis_conn, labbook_container_name) # I don't believe we yet have a way to fit MITM proxy cleanup into the abstract dev env monitor machinery # Could be addressed in #453 MITMProxyOperations.stop_mitm_proxy(labbook_container_name) else: am_running = redis_conn.hget(activity_monitor_key, 'run') if not am_running or am_running.decode() == 'False': # Get author info # RB this is not populated until a labbook is started why running? author_name = redis_conn.hget(dev_env_monitor_key, "author_name").decode() author_email = redis_conn.hget(dev_env_monitor_key, "author_email").decode() # Start new Activity Monitor _, user, owner, labbook_name, dev_env_name = dev_env_monitor_key.split( ':') args = { "module_name": "gtmcore.activity.monitors.monitor_rserver", "class_name": "RStudioServerMonitor", "user": user, "owner": owner, "labbook_name": labbook_name, "monitor_key": activity_monitor_key, "author_name": author_name, "author_email": author_email, "session_metadata": None } d = Dispatcher() process_id = d.dispatch_task( jobs.start_and_run_activity_monitor, kwargs=args, persist=True) logger.info( f"Started RStudio Server Notebook Activity Monitor: Process {process_id}" ) # Update redis redis_conn.hset(activity_monitor_key, "process_id", process_id) redis_conn.hset(activity_monitor_key, "run", True) redis_conn.hset( activity_monitor_key, "logfile_path", MITMProxyOperations.get_mitmlogfile_path( labbook_container_name))