Esempio n. 1
0
    def _load_gigantum_data(self) -> None:
        """Method to load the labbook YAML file to a dictionary

        Returns:
            None
        """
        if not self.root_dir:
            raise GigantumException("No root directory assigned to lab book. "
                                    "Failed to get root directory.")

        schema_path = os.path.join(self.root_dir, '.gigantum', 'project.yaml')
        old_schema_path = os.path.join(self.root_dir, ".gigantum", "labbook.yaml")

        if os.path.exists(schema_path):
            with open(schema_path, 'rt') as lbfile:
                d = yaml.safe_load(lbfile)
            self._data = d
        elif os.path.exists(old_schema_path):
            # For backward compatibility
            with open(old_schema_path, 'rt') as lbfile:
                d = yaml.safe_load(lbfile)
            # "Virtualize" old schemas into new schemas to support back-compatability
            self._data = translate_schema(d, self.root_dir)
        else:
            if 'gm.workspace' in self.get_branches()['local']:
                logger.warning("Master branch empty, attempting to load gm.workspace")
                self.checkout_branch('gm.workspace')
                self._load_gigantum_data()
            else:
                raise GigantumException('Cannot find configuration yaml file')

        if self.schema == 2:
            # Make sure untracked directory exists (it and its contents are ignored)
            os.makedirs(os.path.join(self.root_dir, 'output', 'untracked'), exist_ok=True)
Esempio n. 2
0
    def checkout_branch(self, branch_name: str, new: bool = False) -> None:
        """
        Checkout a Git branch. Create a new branch locally.

        Args:
            branch_name(str): Name of branch to checkout or create
            new(bool): Indicates this branch should be created.

        Return:
            None
        """
        if not self.is_repo_clean:
            raise GigantumException(f"Cannot checkout {branch_name}: Untracked and/or uncommitted changes")

        try:
            if new:
                logger.info(f"Creating a new branch {branch_name}...")
                self.git.create_branch(branch_name)
            logger.info(f"Checking out branch {branch_name}...")
            self.git.checkout(branch_name=branch_name)

            # Clear out checkout context
            if self._root_dir and os.path.exists(os.path.join(self._root_dir, ".gigantum", ".checkout")):
                os.remove(os.path.join(self._root_dir, ".gigantum", ".checkout"))
            self._checkout_id = None
        except ValueError as e:
            logger.error(f"Cannot checkout branch {branch_name}: {e}")
            raise GigantumException(e)
Esempio n. 3
0
def _clone(remote_url: str, working_dir: str) -> str:

    clone_tokens = f"git clone {remote_url}".split()
    call_subprocess(clone_tokens, cwd=working_dir)

    # Affirm there is only one directory created
    dirs = os.listdir(working_dir)
    if len(dirs) != 1:
        raise GigantumException('Git clone produced extra directories')

    p = os.path.join(working_dir, dirs[0])
    if not os.path.exists(p):
        raise GigantumException(
            'Could not find expected path of repo after clone')

    try:
        # This is for backward compatibility -- old projects will clone to
        # branch "gm.workspace" by default -- even if it has already been migrated.
        # This will therefore set the user to the proper branch if the project has been
        # migrated, and will have no affect if it hasn't
        r = call_subprocess("git checkout master".split(), cwd=p)
    except Exception as e:
        logger.error(e)

    return p
Esempio n. 4
0
    def _start_dev_tool(cls, labbook: LabBook, username: str, dev_tool: str, container_override_id: str = None):
        router = ProxyRouter.get_proxy(labbook.client_config.config['proxy'])
        bam = BundledAppManager(labbook)
        bundled_apps = bam.get_bundled_apps()
        bundled_app_names = [x for x in bundled_apps]

        if dev_tool == "rstudio":
            suffix = cls._start_rstudio(labbook, router, username)
        elif dev_tool in ["jupyterlab", "notebook"]:
            # Note that starting the dev tool is identical whether we're targeting jupyterlab or notebook
            suffix = cls._start_jupyter_tool(labbook, router, username, container_override_id)
        elif dev_tool in bundled_app_names:
            app_data = bundled_apps[dev_tool]
            app_data['name'] = dev_tool
            suffix = cls._start_bundled_app(labbook, router, username, app_data, container_override_id)
        else:
            raise GigantumException(f"'{dev_tool}' not currently supported as a Dev Tool")

        # Don't include the port in the path if running on 80
        apparent_proxy_port = labbook.client_config.config['proxy']["apparent_proxy_port"]
        if apparent_proxy_port == 80:
            path = suffix
        else:
            path = f':{apparent_proxy_port}{suffix}'

        return path
Esempio n. 5
0
def start_rserver(labbook: LabBook,
                  username: str,
                  tag: Optional[str] = None,
                  check_reachable: bool = True) -> None:
    """ Main entrypoint to launch rstudio-server. Note, the caller must
        determine for themselves the host and port.

        Raises an exception if there's a problem.

    Returns:
        Path to rstudio-server 
    """
    owner = InventoryManager().query_owner(labbook)
    lb_key = tag or infer_docker_image_name(
        labbook_name=labbook.name, owner=owner, username=username)
    docker_client = get_docker_client()
    lb_container = docker_client.containers.get(lb_key)
    if lb_container.status != 'running':
        raise GigantumException(f"{str(labbook)} container is not running")

    rserver_ps = ps_search(lb_container, 'rserver')

    if len(rserver_ps) == 1:
        # we have an existing rstudio-server instance
        return
    elif len(rserver_ps) == 0:
        _start_rserver_process(lb_container)
    else:
        # If "ps aux" for rserver returns multiple hits - this should never happen.
        for n, l in enumerate(rserver_ps):
            logger.error(
                f'Multiple RStudio-Server instances - ({n+1} of {len(rserver_ps)}) - {l}'
            )
        raise ValueError(
            f'Multiple ({len(rserver_ps)}) RStudio Server instances detected')
Esempio n. 6
0
    def _start_dev_tool(cls,
                        lb: LabBook,
                        username: str,
                        dev_tool: str,
                        container_override_id: str = None):
        pr = ProxyRouter.get_proxy(lb.client_config.config['proxy'])

        if dev_tool == "rstudio":
            suffix = cls._start_rstudio(lb, pr, username)
        elif dev_tool in ["jupyterlab", "notebook"]:
            # Note that starting the dev tool is identical whether we're targeting jupyterlab or notebook
            suffix = cls._start_jupyter_tool(lb, pr, username,
                                             container_override_id)
        else:
            raise GigantumException(
                f"'{dev_tool}' not currently supported as a Dev Tool")

        # Don't include the port in the path if running on 80
        apparent_proxy_port = lb.client_config.config['proxy'][
            "apparent_proxy_port"]
        if apparent_proxy_port == 80:
            path = suffix
        else:
            path = f':{apparent_proxy_port}{suffix}'

        return path
Esempio n. 7
0
def start_bundled_app(labbook: LabBook, username: str, command: str, tag: Optional[str] = None) -> None:
    """ Method to start a bundled app by running the user specified command inside the running Project container

    Args:
        labbook: labbook instance
        username: current logged in user
        command: user specified command to run
        tag: optional tag for the container override id

    Returns:

    """
    if len(command) == 0:
        return

    owner = InventoryManager().query_owner(labbook)
    lb_key = tag or infer_docker_image_name(labbook_name=labbook.name,
                                            owner=owner,
                                            username=username)
    docker_client = get_docker_client()
    lb_container = docker_client.containers.get(lb_key)
    if lb_container.status != 'running':
        raise GigantumException(f"{str(labbook)} container is not running. Start it before starting a bundled app.")

    lb_container.exec_run(f'sh -c "{command}"', detach=True, user='******')
Esempio n. 8
0
    def start_mitm_proxy(cls, lb_endpoint: str, key: str) -> str:
        """Launch a proxy cointainer between client and labbook.

        Args:
            lb_endpoint: the specific target running a dev tool
            key: a unique key for this instance (related to the monitored Project container - e.g., RStudio)

        Returns:
            str that contains the proxy endpoint as http://{ip}:{port}
        """

        # setup the environment - note that UID is obtained inside the container based on labmanager_share_vol
        # (mounted at /mnt/share)
        env_var = [f"LBENDPOINT={lb_endpoint}", f"PROXYID={key}"]
        nametag = f"gmitmproxy.{key}"
        volumes_dict = {
            'labmanager_share_vol': {'bind': '/mnt/share', 'mode': 'rw'}
        }

        docker_client = get_docker_client()

        container = docker_client.containers.run("gigantum/mitmproxy_proxy:" + CURRENT_MITMPROXY_TAG, detach=True,
                                                 init=True, name=nametag, volumes=volumes_dict,
                                                 environment=env_var)

        # For now, we hammer repeatedly for 5 seconds
        # Plan for a better solution is mentioned in #434
        for _ in range(50):
            time.sleep(.1)
            # Hope that our container is actually up and reload
            container.reload()
            container_ip = container.attrs['NetworkSettings']['Networks']['bridge']['IPAddress']
            if container_ip:
                break

        if not container_ip:
            raise GigantumException("Unable to get mitmproxy_proxy IP address.")

        mitm_endpoint = f'http://{container_ip}:8079'

        # register the proxy in KV store
        redis_conn = redis.Redis(db=1)
        redis_conn.set(f"{lb_endpoint}-mitm-endpoint", mitm_endpoint)
        redis_conn.set(f"{lb_endpoint}-mitm-container_id", container.id)
        redis_conn.set(f"{lb_endpoint}-mitm-key", key)

        # make sure proxy is up.
        for timeout in range(10):
            time.sleep(1)
            ec, new_ps_list = container.exec_run(
                f'sh -c "ps aux | grep nginx | grep -v \' grep \'"')
            new_ps_list = new_ps_list.decode().split('\n')
            if any('nginx' in l for l in new_ps_list):
                logger.info(f"Proxy to rserver started within {timeout + 1} seconds")
                break
        else:
            raise ValueError('mitmproxy failed to start after 10 seconds')

        return mitm_endpoint
Esempio n. 9
0
    def modify_tag_visibility(self, tag: str, show: str):
        """Modify all detail objecvts with matching tag to have visibility specified in show"""
        if not self._in_modify:
            raise GigantumException(
                "Attempt to use ActivityRecord.modify_tag_visibility() outside of "
                "ActivityRecord.inspect_detail_objects()")

        # We'll actually do the modifications in one pass when we exit the with-context
        self._tags_to_update[tag] = show
Esempio n. 10
0
 def remote(self) -> Optional[str]:
     try:
         r = self.git.list_remotes()
         if r:
             return r[0]['url']
         else:
             return None
     except Exception as e:
         logger.exception(e)
         raise GigantumException(e)
Esempio n. 11
0
    def remove_remote(self, remote_name: Optional[str] = "origin") -> None:
        """Remove a remove from the git config

        Args:
            remote_name: Optional name of remote (default "origin")
        """
        try:
            logger.info(f"Removing remote {remote_name} from {str(self)}")
            self.git.remove_remote(remote_name)
        except Exception as e:
            raise GigantumException(e)
Esempio n. 12
0
    def has_remote(self):
        """Return True if the Repository has a remote that it can push/pull to/from

        Returns:
            bool indicating whether a remote is set.
        """
        try:
            return len(self.git.list_remotes()) > 0
        except Exception as e:
            logger.exception(e)
            raise GigantumException(e)
Esempio n. 13
0
    def add_remote(self, remote_name: str, url: str) -> None:
        """Add a new git remote

        Args:
            remote_name: Name of remote, e.g., "origin"
            url: Path to remote Git repository.
        """

        try:
            logger.info(f"Adding new remote {remote_name} at {url}")
            self.git.add_remote(remote_name, url)
            self.git.fetch(remote=remote_name)
        except Exception as e:
            raise GigantumException(e)
Esempio n. 14
0
def get_storage_backend(storage_type: str) -> Union[ManagedStorageBackend, UnmanagedStorageBackend]:
    """

    Args:
        storage_type(str): Identifier to load class

    Returns:
        gtmcore.dataset.storage.backend.StorageBackend
    """
    if storage_type in SUPPORTED_STORAGE_BACKENDS.keys():
        module, package = SUPPORTED_STORAGE_BACKENDS.get(storage_type)  # type: ignore
        imported = importlib.import_module(module, package)
        class_instance = getattr(imported, package)
        return class_instance()
    else:
        raise GigantumException(f"Unsupported Dataset Storage Type: {storage_type}")
Esempio n. 15
0
    def get_branches(self) -> Dict[str, List[str]]:
        """Return all branches a Dict of Lists. Dict contains two keys "local" and "remote".

        Args:
            None

        Returns:
            Dictionary of lists for "remote" and "local" branches.
        """

        try:
            # Note - do NOT fetch here - fetch should be done before this is called.
            return self.git.list_branches()
        except Exception as e:
            # Unsure what specific exception add_remote creates, so make a catchall.
            logger.exception(e)
            raise GigantumException(e)
Esempio n. 16
0
    def update_detail_object(self, obj: ActivityDetailRecord,
                             index: int) -> None:
        """Method to update a detail object in place

        Can only be used while in the context of self.inspect_detail_objects

        Args:
            obj: detail record to add
            index: index to update
        """
        if not self._in_modify:
            raise GigantumException(
                "Attempt to use ActivityRecord.update_detail_object() outside of "
                "ActivityRecord.inspect_detail_objects()")
        if index < 0 or index >= len(self._detail_objects):
            raise ValueError("Index out of range when updating detail object")

        self._detail_objects[index] = (obj.show, obj.type.value,
                                       obj.importance, obj)
Esempio n. 17
0
def check_jupyter_reachable(ip_address: str, port: int, prefix: str):
    for n in range(20):
        test_url = f'http://{ip_address}:{port}{prefix}/api'
        logger.debug(
            f"Attempt {n + 1}: Testing if JupyerLab is up at {test_url}...")
        try:
            r = requests.get(test_url, timeout=0.5)
            if r.status_code != 200:
                time.sleep(0.5)
            else:
                if "version" in r.json():
                    logger.info(
                        f'Found JupyterLab up at {test_url} after {n/2.0} seconds'
                    )
                    break
                else:
                    time.sleep(0.5)
        except requests.exceptions.ConnectionError:
            # Assume API isn't up at all yet, so no connection can be made
            time.sleep(0.5)
    else:
        raise GigantumException(
            f'Could not reach JupyterLab at {test_url} after timeout')
Esempio n. 18
0
def get_cache_manager_class(config: Configuration) -> Callable:
    """

    Args:
        config(Configuration): Configuration for the client

    Returns:
        gtmcore.dataset.cache.CacheManager
    """
    dataset_config = config.config.get('datasets')
    if not dataset_config:
        # Fallback to default host manager
        manager_str = 'host'
    else:
        manager_str = dataset_config.get('cache_manager')

    if manager_str in SUPPORTED_CACHE_MANAGERS.keys():
        module, package = SUPPORTED_CACHE_MANAGERS.get(manager_str)  # type: ignore
        imported = importlib.import_module(module, package)
        class_instance = getattr(imported, package)
        return class_instance
    else:
        raise GigantumException(f"Unsupported Dataset File Cache Manager: {manager_str}")
Esempio n. 19
0
    def start_mitm_proxy(cls, devtool_endpoint: str, target_key: str) -> str:
        """Launch a proxy cointainer between client and labbook.

        Args:
            devtool_endpoint: the specific target running a dev tool
            target_key: a unique key for this instance (related to the monitored Project container - e.g., RStudio)

        Returns:
            str that contains the proxy endpoint as http://{ip}:{port}
        """
        hkey = cls.get_mitm_redis_key(target_key)

        # setup the environment - note that UID is obtained inside the container based on labmanager_share_vol
        # (mounted at /mnt/share)
        logfile_path = f'/mnt/share/{cls.logfile_dir}/{target_key}.rserver.dump'

        env_var = [
            f"LBENDPOINT={devtool_endpoint}", f"LOGFILE_NAME={logfile_path}"
        ]
        nametag = f"gmitmproxy.{target_key}"
        volumes_dict = {
            'labmanager_share_vol': {
                'bind': '/mnt/share',
                'mode': 'rw'
            }
        }

        docker_client = get_docker_client()

        container = docker_client.containers.run("gigantum/mitmproxy_proxy:" +
                                                 CURRENT_MITMPROXY_TAG,
                                                 detach=True,
                                                 init=True,
                                                 name=nametag,
                                                 volumes=volumes_dict,
                                                 environment=env_var)

        # We hammer repeatedly for 5 seconds (this should be very fast since it's a small, simple container)
        for _ in range(10):
            time.sleep(.5)
            # Hope that our container is actually up and reload
            container.reload()
            mitm_ip = container.attrs['NetworkSettings']['Networks']['bridge'][
                'IPAddress']
            if mitm_ip:
                break

        if not mitm_ip:
            raise GigantumException(
                "Unable to get mitmproxy_proxy IP address.")

        # This is the port for NGINX
        mitm_endpoint = f'http://{mitm_ip}:8079'

        # register the proxy in KV store
        redis_conn = redis.Redis(db=1)
        redis_conn.hset(hkey, "endpoint", mitm_endpoint)
        redis_conn.hset(hkey, "container_id", container.id)
        redis_conn.hset(hkey, "logfile_path", logfile_path)
        redis_conn.hset(hkey, "devtool_endpoint", devtool_endpoint)

        # make sure proxy is up.
        for timeout in range(10):
            time.sleep(1)
            if ps_search(container, 'nginx'):
                logger.info(
                    f"Proxy to rserver started within {timeout + 1} seconds")
                break
        else:
            raise ValueError('mitmproxy failed to start after 10 seconds')

        return mitm_endpoint
Esempio n. 20
0
def start_jupyter(labbook: LabBook,
                  username: str,
                  tag: Optional[str] = None,
                  check_reachable: bool = True,
                  proxy_prefix: Optional[str] = None) -> str:
    """ Main entrypoint to launching Jupyter. Note, the caller must
        determine for themselves the host and port.

    Returns:
        Path to jupyter (e.g., "/lab?token=xyz")
    """
    owner = InventoryManager().query_owner(labbook)
    lb_key = tag or infer_docker_image_name(
        labbook_name=labbook.name, owner=owner, username=username)
    docker_client = get_docker_client()
    lb_container = docker_client.containers.get(lb_key)
    if lb_container.status != 'running':
        raise GigantumException(
            f"{str(labbook)} container is not running. Start it before launch a dev tool."
        )

    jupyter_ps = ps_search(lb_container, 'jupyter lab')

    # Get IP of container on Docker Bridge Network
    lb_ip_addr = get_container_ip(lb_key)

    if len(jupyter_ps) == 1:
        logger.info(f'Found existing Jupyter instance for {str(labbook)}.')

        # Get token from PS in container
        t = re.search("token='?([a-zA-Z\d-]+)'?", jupyter_ps[0])
        if not t:
            raise GigantumException('Cannot detect Jupyter Lab token')
        token = t.groups()[0]
        suffix = f'{proxy_prefix or ""}/lab/tree/code?token={token}'

        if check_reachable:
            check_jupyter_reachable(lb_ip_addr, DEFAULT_JUPYTER_PORT,
                                    f'{proxy_prefix or ""}')

        return suffix
    elif len(jupyter_ps) == 0:
        token = str(uuid.uuid4()).replace('-', '')
        if proxy_prefix and proxy_prefix[0] != '/':
            proxy_prefix = f'/{proxy_prefix}'
        _start_jupyter_process(labbook, lb_container, username, lb_key, token,
                               proxy_prefix)
        suffix = f'{proxy_prefix or ""}/lab/tree/code?token={token}'
        if check_reachable:
            check_jupyter_reachable(lb_ip_addr, DEFAULT_JUPYTER_PORT,
                                    f'{proxy_prefix or ""}')
        return suffix
    else:
        # If "ps aux" for jupyterlab returns multiple hits - this should never happen.
        for n, l in enumerate(jupyter_ps):
            logger.error(
                f'Multiple JupyerLab instances - ({n+1} of {len(jupyter_ps)}) - {l}'
            )
        raise ValueError(
            f'Multiple Jupyter Lab instances detected in project env. You should restart the container.'
        )