Exemple #1
0
    def _start_rstudio(cls,
                       lb: LabBook,
                       pr: ProxyRouter,
                       username: str,
                       container_override_id: str = None):
        lb_ip = ContainerOperations.get_labbook_ip(lb, username)
        lb_endpoint = f'http://{lb_ip}:8787'

        mitm_endpoint = MITMProxyOperations.get_mitmendpoint(lb_endpoint)
        # start mitm proxy if it doesn't exist
        if mitm_endpoint is None:

            # get a proxy prefix
            unique_key = unique_id()

            # start proxy
            mitm_endpoint = MITMProxyOperations.start_mitm_proxy(
                lb_endpoint, unique_key)

            # Ensure we start monitor when starting MITM proxy
            start_labbook_monitor(
                lb,
                username,
                "rstudio",
                # This is the endpoint for the proxy and not the rserver?
                url=f'{lb_endpoint}/{unique_key}',
                author=get_logged_in_author())

            # All messages will come through MITM, so we don't need to monitor rserver directly
            start_rserver(lb, username, tag=container_override_id)

            # add route
            rt_prefix, _ = pr.add(mitm_endpoint, f'rserver/{unique_key}/')
            # Warning: RStudio will break if there is a trailing slash!
            suffix = f'/{rt_prefix}'

        else:
            # existing route to MITM or not?
            matched_routes = pr.get_matching_routes(mitm_endpoint, 'rserver')

            if len(matched_routes) == 1:
                suffix = matched_routes[0]
            elif len(matched_routes) == 0:
                logger.warning(
                    'Creating missing route for existing RStudio mitmproxy_proxy'
                )
                # TODO DC: This feels redundant with already getting the mitm_endpoint above
                # Can we refactor this into a more coherent single operation? Maybe an MITMProxy instance?
                unique_key = MITMProxyOperations.get_mitmkey(lb_endpoint)
                # add route
                rt_prefix, _ = pr.add(mitm_endpoint, f'rserver/{unique_key}/')
                # Warning: RStudio will break if there is a trailing slash!
                suffix = f'/{rt_prefix}'
            else:
                raise ValueError(
                    f"Multiple RStudio proxy instances for {str(lb)}. Please restart the Project "
                    "or manually delete stale containers.")

        return suffix
Exemple #2
0
    def _stop_container(cls, lb, username):
        """Stop container and also do necessary cleanup of confhttpproxy, monitors, etc.

        Currently, this supports two cases, applications monitored by MITMProxy,
        and Jupyter. So, for now, if we can't find an mitmproxy endpoint, we assume
        we're dealing with a jupyter container.
        """

        pr = confhttpproxy.ProxyRouter.get_proxy(
            lb.client_config.config['proxy'])

        # Remove route from proxy
        lb_name = ContainerOperations.labbook_image_name(lb, username)
        if MITMProxyOperations.get_mitmendpoint(lb_name):
            # there is an MITMProxy (currently only used for RStudio)
            proxy_endpoint = MITMProxyOperations.stop_mitm_proxy(lb_name)
            tool = 'rserver'
        else:
            lb_ip = ContainerOperations.get_labbook_ip(lb, username)
            # The only alternative to mitmproxy (currently) is jupyter
            # TODO in #453: Construction of this URL should be encapsulated in Jupyter Dev Tool logic
            proxy_endpoint = f'http://{lb_ip}:8888'
            tool = 'jupyter'

        est_target = pr.get_matching_routes(proxy_endpoint, tool)

        for i, target in enumerate(est_target):
            if i == 1:
                # We have > 1 entry in the router, which shouldn't happen
                logger.warning(
                    f'Removing multiple routes for {tool} on {proxy_endpoint} during Project container stop.'
                )
            pr.remove(target[1:])

        wf = LabbookWorkflow(lb)
        wf.garbagecollect()

        # Clean up empty bind mount dirs from datasets if needed
        submodules = lb.git.list_submodules()
        for submodule in submodules:
            namespace, dataset_name = submodule['name'].split("&")
            bind_location = os.path.join(lb.root_dir, 'input', dataset_name)
            if os.path.isdir(bind_location):
                os.rmdir(bind_location)

        # stop labbook monitor
        stop_labbook_monitor(lb, username)

        lb, stopped = ContainerOperations.stop_container(labbook=lb,
                                                         username=username)

        if not stopped:
            # TODO DK: Why would stopped=False? Should this move up??
            raise ValueError(f"Failed to stop labbook {lb.name}")
Exemple #3
0
    def _start_rstudio(cls, labbook: LabBook, router: ProxyRouter, username: str,
                       container_override_id: str = None):
        mitm_url, pr_suffix = MITMProxyOperations.configure_mitmroute(labbook, router, username)

        # All messages will come through MITM, so we don't need to monitor rserver directly
        start_rserver(labbook, username, tag=container_override_id)

        # Ensure monitor is running
        start_labbook_monitor(labbook, username, "rstudio",
                              # the endpoint for the NGINX proxy running inside the mitmproxy container
                              # (not the rserver) which maps `/rserver/<whatever>/<foo>` to `/<foo>`.
                              # But url isn't used currently by monitor_rserver.RServerMonitor!
                              url=mitm_url,
                              author=get_logged_in_author())

        return pr_suffix
Exemple #4
0
    def start(self, metadata: Dict[str, str], database: int = 1) -> None:
        """Method called in a periodically scheduled async worker that should check the dev env and manage Activity
        Monitor Instances as needed

        Args:
            metadata(dict): A dictionary of data to start the activity monitor
            database(int): The database ID to use

        Returns:
            None
        """
        # Get connection to the DB
        redis_conn = redis.Redis(db=database)

        logfile_path = redis_conn.hget(self.monitor_key, "logfile_path")

        # TODO RB will need to open in write mode later to sparsify parts of the file that have already been read
        # https://github.com/gigantum/gigantum-client/issues/434, also part of #453
        # open the log file
        mitmlog = open(logfile_path, "rb")
        if not mitmlog:
            logger.info(f"Failed to open RStudio log {logfile_path}")
            return

        try:
            while True:
                still_running = redis_conn.hget(self.monitor_key, "run")
                # Check if you should exit
                # sometimes this runs after key has been deleted.  None is shutdown too.
                if not still_running or still_running.decode() == "False":
                    logger.info(
                        f"Received Activity Monitor Shutdown Message for {self.monitor_key}"
                    )
                    redis_conn.delete(self.monitor_key)
                    break

                previous_cells = len(self.cell_data)

                # Read activity and update aggregated "cell" data
                self.process_activity(mitmlog)

                # We are processing every second, then aggregating activity records when idle
                if previous_cells == len(
                        self.cell_data) and self.current_cell.is_empty():
                    # there are no new cells in the last second, and no cells are in-process
                    self.store_record()

                # Check for new records every second
                time.sleep(1)

        except Exception as e:
            logger.error(
                f"Fatal error in RStudio Server Activity Monitor: {e}")
            raise
        finally:
            # Delete the kernel monitor key so the dev env monitor will spin up a new process
            # You may lose some activity if this happens, but the next action will sweep up changes
            logger.info(f"Shutting down RStudio monitor {self.monitor_key}")
            redis_conn.delete(self.monitor_key)
            # At this point, there is no chance we'll get anything else out of unmonitored files!
            MITMProxyOperations.clean_logfiles()
Exemple #5
0
    def run(self, dev_env_monitor_key: str, database: int = 1) -> None:
        """Method called in a periodically scheduled async worker that should check the dev env and manage Activity
        Monitor Instances as needed

        Args:
            dev_env_monitor_key: The unique string used as the key in redis to track this DevEnvMonitor instance
            database: The redis database number for dev env monitors to use
        """
        redis_conn = redis.Redis(db=database)
        activity_monitor_key = f'{dev_env_monitor_key}:activity_monitor'

        retval = redis_conn.hget(dev_env_monitor_key, 'container_name')
        if retval:
            labbook_container_name = retval.decode()
        else:
            # This shouldn't happen, but just in case
            logger.error(
                f'No container name for DevTool Monitor {dev_env_monitor_key}, stopping'
            )
            # This should clean up everything this monitor is managing
            # labbook name is just for logging purposes, so we supply 'unknown'
            stop_dev_env_monitors(dev_env_monitor_key, redis_conn, 'unknown')
            return

        # For now, we directly query docker, this could be cleaned up in #453
        client = get_docker_client()
        try:
            dev_env_container_status = client.containers.get(
                labbook_container_name).status
        except NotFound:
            dev_env_container_status = 'not found'

        # Clean up and return labbook container names for running proxies
        running_proxy_lb_names = MITMProxyOperations.get_running_proxies()

        # As part of #453, we should re-start the proxy if the dev tool is still running
        if labbook_container_name not in running_proxy_lb_names:
            # MITM proxy isn't running anymore.
            logger.info(
                f"Detected exited RStudio proxy {labbook_container_name}. Stopping monitoring for {activity_monitor_key}"
            )
            logger.info(f"Running proxies: {running_proxy_lb_names}")
            # This should clean up everything it's managing
            stop_dev_env_monitors(dev_env_monitor_key, redis_conn,
                                  labbook_container_name)
        elif dev_env_container_status != "running":
            # RStudio container isn't running anymore. Clean up by setting run flag to `False` so worker exits
            logger.info(
                f"Detected exited RStudio Project {labbook_container_name}. Stopping monitoring for {activity_monitor_key}"
            )
            logger.info(f"Running proxies: {running_proxy_lb_names}")
            # This should clean up everything it's managing
            stop_dev_env_monitors(dev_env_monitor_key, redis_conn,
                                  labbook_container_name)
            # I don't believe we yet have a way to fit MITM proxy cleanup into the abstract dev env monitor machinery
            # Could be addressed in #453
            MITMProxyOperations.stop_mitm_proxy(labbook_container_name)
        else:
            am_running = redis_conn.hget(activity_monitor_key, 'run')
            if not am_running or am_running.decode() == 'False':
                # Get author info
                # RB this is not populated until a labbook is started why running?
                author_name = redis_conn.hget(dev_env_monitor_key,
                                              "author_name").decode()
                author_email = redis_conn.hget(dev_env_monitor_key,
                                               "author_email").decode()
                # Start new Activity Monitor
                _, user, owner, labbook_name, dev_env_name = dev_env_monitor_key.split(
                    ':')

                args = {
                    "module_name": "gtmcore.activity.monitors.monitor_rserver",
                    "class_name": "RStudioServerMonitor",
                    "user": user,
                    "owner": owner,
                    "labbook_name": labbook_name,
                    "monitor_key": activity_monitor_key,
                    "author_name": author_name,
                    "author_email": author_email,
                    "session_metadata": None
                }

                d = Dispatcher()
                process_id = d.dispatch_task(
                    jobs.start_and_run_activity_monitor,
                    kwargs=args,
                    persist=True)
                logger.info(
                    f"Started RStudio Server Notebook Activity Monitor: Process {process_id}"
                )

                # Update redis
                redis_conn.hset(activity_monitor_key, "process_id", process_id)
                redis_conn.hset(activity_monitor_key, "run", True)
                redis_conn.hset(
                    activity_monitor_key, "logfile_path",
                    MITMProxyOperations.get_mitmlogfile_path(
                        labbook_container_name))