Example #1
0
def fixture_single_dataset():
    """A pytest fixture that creates a temporary working directory, a config file to match, creates the schema,
    and populates the environment component repository.
    Class scope modifier attached
    """
    # Create temp dir
    config_file, temp_dir = _create_temp_work_dir()

    # Create user identity
    insert_cached_identity(temp_dir)

    # Create test client
    schema = graphene.Schema(query=LabbookQuery, mutation=LabbookMutations)

    # Create a bunch of lab books
    im = InventoryManager(config_file)

    ds = im.create_dataset('default',
                           'default',
                           "test-dataset",
                           storage_type="gigantum_object_v1",
                           description="Cats 2")
    m = Manifest(ds, 'default')
    cm_class = get_cache_manager_class(ds.client_config)
    cache_mgr = cm_class(ds, 'default')
    revision = ds.git.repo.head.commit.hexsha

    os.makedirs(os.path.join(cache_mgr.cache_root, revision, "other_dir"))
    helper_append_file(cache_mgr.cache_root, revision, "test1.txt", "asdfasdf")
    helper_append_file(cache_mgr.cache_root, revision, "test2.txt", "rtg")
    helper_append_file(cache_mgr.cache_root, revision, "test3.txt", "wer")
    helper_append_file(cache_mgr.cache_root, revision, "other_dir/test4.txt",
                       "dfasdfhfgjhg")
    helper_append_file(cache_mgr.cache_root, revision, "other_dir/test5.txt",
                       "fdghdfgsa")
    m.update()

    with patch.object(Configuration, 'find_default_config',
                      lambda self: config_file):
        # Load User identity into app context
        app = Flask("lmsrvlabbook")
        app.config["LABMGR_CONFIG"] = Configuration()
        app.config["LABMGR_ID_MGR"] = get_identity_manager(Configuration())

        with app.app_context():
            # within this block, current_app points to app. Set current user explicitly (this is done in the middleware)
            flask.g.user_obj = app.config["LABMGR_ID_MGR"].get_user_profile()

            # Create a test client
            client = Client(schema,
                            middleware=[DataloaderMiddleware()],
                            context_value=ContextMock())

            yield config_file, temp_dir, client, ds, cache_mgr

    # Remove the temp_dir
    shutil.rmtree(temp_dir)
    def test_get_cache_manager_class(self, mock_dataset_with_cache_dir):
        ds = mock_dataset_with_cache_dir[0]
        cm = get_cache_manager_class(ds.client_config)
        hfc = cm(ds, "tester")
        assert isinstance(hfc, HostFilesystemCache)

        assert hfc.cache_root == os.path.join(ds.client_config.config.get('git')['working_directory'], '.labmanager',
                                              'datasets', 'tester', 'tester', ds.name)

        rev = ds.git.repo.head.commit.hexsha
        assert hfc.current_revision_dir == os.path.join(ds.client_config.config.get('git')['working_directory'],
                                                        '.labmanager', 'datasets', 'tester', 'tester', ds.name, rev)
    def test_get_cache_manager_class_no_config(self, mock_dataset_with_cache_dir):
        ds = mock_dataset_with_cache_dir[0]
        config = ds.client_config
        assert 'datasets' in config.config
        del config.config['datasets']
        assert 'datasets' not in config.config

        cm = get_cache_manager_class(config)
        hfc = cm(ds, "tester")
        assert isinstance(hfc, HostFilesystemCache)

        assert hfc.cache_root == os.path.join(ds.client_config.config.get('git')['working_directory'], '.labmanager',
                                              'datasets', 'tester', 'tester', ds.name)
Example #4
0
    def __init__(self,
                 dataset: Dataset,
                 logged_in_username: Optional[str] = None) -> None:
        self.dataset = dataset

        cache_mgr_class = get_cache_manager_class(self.dataset.client_config)
        self.cache_mgr: CacheManager = cache_mgr_class(self.dataset,
                                                       logged_in_username)

        self.hasher = SmartHash(dataset.root_dir, self.cache_mgr.cache_root,
                                self.dataset.git.repo.head.commit.hexsha)

        self.ignore_file = os.path.join(dataset.root_dir, ".gigantumignore")

        self.manifest = self._load_manifest()
Example #5
0
    def __init__(self, dataset: 'Dataset', logged_in_username: Optional[str] = None) -> None:
        self.dataset = dataset
        self.logged_in_username = logged_in_username

        cache_mgr_class = get_cache_manager_class(self.dataset.client_config)
        self.cache_mgr: CacheManager = cache_mgr_class(self.dataset, logged_in_username)

        self.hasher = SmartHash(dataset.root_dir, self.cache_mgr.cache_root,
                                self.dataset.git.repo.head.commit.hexsha)

        self._manifest_io = ManifestFileCache(dataset, logged_in_username)

        # TODO: Support ignoring files
        # self.ignore_file = os.path.join(dataset.root_dir, ".gigantumignore")
        # self.ignored = self._load_ignored()

        self._legacy_manifest_file = os.path.join(self.dataset.root_dir, 'manifest', 'manifest0')
Example #6
0
def start_labbook_container(labbook_root: str,
                            config_path: str,
                            username: str,
                            override_image_id: Optional[str] = None) -> str:
    """ Start a Docker container from a given image_name.

    Args:
        labbook_root: Root dir of labbook
        config_path: Path to LabBook configuration file.
        override_image_id: Optional explicit docker image id (do not infer).
        username: Username of active user. Do not use with override_image_id.

    Returns:
        Tuple containing docker container id, dict mapping of exposed ports.

    Raises:
    """
    if username and override_image_id:
        raise ValueError(
            'Argument username and override_image_id cannot both be set')

    lb = InventoryManager(
        config_file=config_path).load_labbook_from_directory(labbook_root)
    if not override_image_id:
        owner = InventoryManager().query_owner(lb)
        tag = infer_docker_image_name(lb.name, owner, username)
    else:
        tag = override_image_id

    mnt_point = labbook_root.replace('/mnt/gigantum',
                                     os.environ['HOST_WORK_DIR'])
    volumes_dict = {
        mnt_point: {
            'bind': '/mnt/labbook',
            'mode': 'cached'
        },
        'labmanager_share_vol': {
            'bind': '/mnt/share',
            'mode': 'rw'
        }
    }

    # Set up additional bind mounts for datasets if needed.
    submodules = lb.git.list_submodules()
    for submodule in submodules:
        try:
            namespace, dataset_name = submodule['name'].split("&")
            submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets',
                                         namespace, dataset_name)
            ds = InventoryManager().load_dataset_from_directory(submodule_dir)
            ds.namespace = namespace

            cm_class = get_cache_manager_class(ds.client_config)
            cm = cm_class(ds, username)
            ds_cache_dir = cm.current_revision_dir.replace(
                '/mnt/gigantum', os.environ['HOST_WORK_DIR'])
            volumes_dict[ds_cache_dir] = {
                'bind': f'/mnt/labbook/input/{ds.name}',
                'mode': 'ro'
            }
        except InventoryException:
            continue

    # If re-mapping permissions, be sure to configure the container
    if 'LOCAL_USER_ID' in os.environ:
        env_var = [f"LOCAL_USER_ID={os.environ['LOCAL_USER_ID']}"]
    else:
        env_var = ["WINDOWS_HOST=1"]

    # Get resource limits
    resource_args = dict()
    memory_limit = lb.client_config.config['container']['memory']
    cpu_limit = lb.client_config.config['container']['cpu']
    gpu_shared_mem = lb.client_config.config['container']['gpu_shared_mem']
    if memory_limit:
        # If memory_limit not None, pass to Docker to limit memory allocation to container
        resource_args["mem_limit"] = memory_limit
    if cpu_limit:
        # If cpu_limit not None, pass to Docker to limit CPU allocation to container
        # "nano_cpus" is an integer in factional parts of a CPU
        resource_args["nano_cpus"] = round(cpu_limit * 1e9)

    docker_client = get_docker_client()

    # run with nvidia-docker if we have GPU support on the Host compatible with the project
    should_run_nvidia, reason = should_launch_with_cuda_support(
        lb.cuda_version)
    if should_run_nvidia:
        logger.info(f"Launching container with GPU support:{reason}")
        if gpu_shared_mem:
            resource_args["shm_size"] = gpu_shared_mem

        container_id = docker_client.containers.run(tag,
                                                    detach=True,
                                                    init=True,
                                                    name=tag,
                                                    environment=env_var,
                                                    volumes=volumes_dict,
                                                    runtime='nvidia',
                                                    **resource_args).id
    else:
        logger.info(f"Launching container without GPU support. {reason}")
        container_id = docker_client.containers.run(tag,
                                                    detach=True,
                                                    init=True,
                                                    name=tag,
                                                    environment=env_var,
                                                    volumes=volumes_dict,
                                                    **resource_args).id

    labmanager_ip = ""
    try:
        labmanager_ip = get_labmanager_ip() or ""
    except IndexError:
        logger.warning("Cannot find labmanager IP")

    labmanager_ip = labmanager_ip.strip()
    cmd = f"echo {labmanager_ip} > /home/giguser/labmanager_ip"
    for timeout in range(20):
        time.sleep(0.5)
        if docker_client.containers.get(container_id).status == 'running':
            r = docker_client.containers.get(container_id).exec_run(
                f'sh -c "{cmd}"')
            logger.info(f"Response to write labmanager_ip in {tag}: {r}")
            break
    else:
        logger.error(
            "After 10 seconds could not write IP to labmanager container."
            f" Container status = {docker_client.containers.get(container_id).status}"
        )
    return container_id