def fixture_single_dataset(): """A pytest fixture that creates a temporary working directory, a config file to match, creates the schema, and populates the environment component repository. Class scope modifier attached """ # Create temp dir config_file, temp_dir = _create_temp_work_dir() # Create user identity insert_cached_identity(temp_dir) # Create test client schema = graphene.Schema(query=LabbookQuery, mutation=LabbookMutations) # Create a bunch of lab books im = InventoryManager(config_file) ds = im.create_dataset('default', 'default', "test-dataset", storage_type="gigantum_object_v1", description="Cats 2") m = Manifest(ds, 'default') cm_class = get_cache_manager_class(ds.client_config) cache_mgr = cm_class(ds, 'default') revision = ds.git.repo.head.commit.hexsha os.makedirs(os.path.join(cache_mgr.cache_root, revision, "other_dir")) helper_append_file(cache_mgr.cache_root, revision, "test1.txt", "asdfasdf") helper_append_file(cache_mgr.cache_root, revision, "test2.txt", "rtg") helper_append_file(cache_mgr.cache_root, revision, "test3.txt", "wer") helper_append_file(cache_mgr.cache_root, revision, "other_dir/test4.txt", "dfasdfhfgjhg") helper_append_file(cache_mgr.cache_root, revision, "other_dir/test5.txt", "fdghdfgsa") m.update() with patch.object(Configuration, 'find_default_config', lambda self: config_file): # Load User identity into app context app = Flask("lmsrvlabbook") app.config["LABMGR_CONFIG"] = Configuration() app.config["LABMGR_ID_MGR"] = get_identity_manager(Configuration()) with app.app_context(): # within this block, current_app points to app. Set current user explicitly (this is done in the middleware) flask.g.user_obj = app.config["LABMGR_ID_MGR"].get_user_profile() # Create a test client client = Client(schema, middleware=[DataloaderMiddleware()], context_value=ContextMock()) yield config_file, temp_dir, client, ds, cache_mgr # Remove the temp_dir shutil.rmtree(temp_dir)
def test_get_cache_manager_class(self, mock_dataset_with_cache_dir): ds = mock_dataset_with_cache_dir[0] cm = get_cache_manager_class(ds.client_config) hfc = cm(ds, "tester") assert isinstance(hfc, HostFilesystemCache) assert hfc.cache_root == os.path.join(ds.client_config.config.get('git')['working_directory'], '.labmanager', 'datasets', 'tester', 'tester', ds.name) rev = ds.git.repo.head.commit.hexsha assert hfc.current_revision_dir == os.path.join(ds.client_config.config.get('git')['working_directory'], '.labmanager', 'datasets', 'tester', 'tester', ds.name, rev)
def test_get_cache_manager_class_no_config(self, mock_dataset_with_cache_dir): ds = mock_dataset_with_cache_dir[0] config = ds.client_config assert 'datasets' in config.config del config.config['datasets'] assert 'datasets' not in config.config cm = get_cache_manager_class(config) hfc = cm(ds, "tester") assert isinstance(hfc, HostFilesystemCache) assert hfc.cache_root == os.path.join(ds.client_config.config.get('git')['working_directory'], '.labmanager', 'datasets', 'tester', 'tester', ds.name)
def __init__(self, dataset: Dataset, logged_in_username: Optional[str] = None) -> None: self.dataset = dataset cache_mgr_class = get_cache_manager_class(self.dataset.client_config) self.cache_mgr: CacheManager = cache_mgr_class(self.dataset, logged_in_username) self.hasher = SmartHash(dataset.root_dir, self.cache_mgr.cache_root, self.dataset.git.repo.head.commit.hexsha) self.ignore_file = os.path.join(dataset.root_dir, ".gigantumignore") self.manifest = self._load_manifest()
def __init__(self, dataset: 'Dataset', logged_in_username: Optional[str] = None) -> None: self.dataset = dataset self.logged_in_username = logged_in_username cache_mgr_class = get_cache_manager_class(self.dataset.client_config) self.cache_mgr: CacheManager = cache_mgr_class(self.dataset, logged_in_username) self.hasher = SmartHash(dataset.root_dir, self.cache_mgr.cache_root, self.dataset.git.repo.head.commit.hexsha) self._manifest_io = ManifestFileCache(dataset, logged_in_username) # TODO: Support ignoring files # self.ignore_file = os.path.join(dataset.root_dir, ".gigantumignore") # self.ignored = self._load_ignored() self._legacy_manifest_file = os.path.join(self.dataset.root_dir, 'manifest', 'manifest0')
def start_labbook_container(labbook_root: str, config_path: str, username: str, override_image_id: Optional[str] = None) -> str: """ Start a Docker container from a given image_name. Args: labbook_root: Root dir of labbook config_path: Path to LabBook configuration file. override_image_id: Optional explicit docker image id (do not infer). username: Username of active user. Do not use with override_image_id. Returns: Tuple containing docker container id, dict mapping of exposed ports. Raises: """ if username and override_image_id: raise ValueError( 'Argument username and override_image_id cannot both be set') lb = InventoryManager( config_file=config_path).load_labbook_from_directory(labbook_root) if not override_image_id: owner = InventoryManager().query_owner(lb) tag = infer_docker_image_name(lb.name, owner, username) else: tag = override_image_id mnt_point = labbook_root.replace('/mnt/gigantum', os.environ['HOST_WORK_DIR']) volumes_dict = { mnt_point: { 'bind': '/mnt/labbook', 'mode': 'cached' }, 'labmanager_share_vol': { 'bind': '/mnt/share', 'mode': 'rw' } } # Set up additional bind mounts for datasets if needed. submodules = lb.git.list_submodules() for submodule in submodules: try: namespace, dataset_name = submodule['name'].split("&") submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', namespace, dataset_name) ds = InventoryManager().load_dataset_from_directory(submodule_dir) ds.namespace = namespace cm_class = get_cache_manager_class(ds.client_config) cm = cm_class(ds, username) ds_cache_dir = cm.current_revision_dir.replace( '/mnt/gigantum', os.environ['HOST_WORK_DIR']) volumes_dict[ds_cache_dir] = { 'bind': f'/mnt/labbook/input/{ds.name}', 'mode': 'ro' } except InventoryException: continue # If re-mapping permissions, be sure to configure the container if 'LOCAL_USER_ID' in os.environ: env_var = [f"LOCAL_USER_ID={os.environ['LOCAL_USER_ID']}"] else: env_var = ["WINDOWS_HOST=1"] # Get resource limits resource_args = dict() memory_limit = lb.client_config.config['container']['memory'] cpu_limit = lb.client_config.config['container']['cpu'] gpu_shared_mem = lb.client_config.config['container']['gpu_shared_mem'] if memory_limit: # If memory_limit not None, pass to Docker to limit memory allocation to container resource_args["mem_limit"] = memory_limit if cpu_limit: # If cpu_limit not None, pass to Docker to limit CPU allocation to container # "nano_cpus" is an integer in factional parts of a CPU resource_args["nano_cpus"] = round(cpu_limit * 1e9) docker_client = get_docker_client() # run with nvidia-docker if we have GPU support on the Host compatible with the project should_run_nvidia, reason = should_launch_with_cuda_support( lb.cuda_version) if should_run_nvidia: logger.info(f"Launching container with GPU support:{reason}") if gpu_shared_mem: resource_args["shm_size"] = gpu_shared_mem container_id = docker_client.containers.run(tag, detach=True, init=True, name=tag, environment=env_var, volumes=volumes_dict, runtime='nvidia', **resource_args).id else: logger.info(f"Launching container without GPU support. {reason}") container_id = docker_client.containers.run(tag, detach=True, init=True, name=tag, environment=env_var, volumes=volumes_dict, **resource_args).id labmanager_ip = "" try: labmanager_ip = get_labmanager_ip() or "" except IndexError: logger.warning("Cannot find labmanager IP") labmanager_ip = labmanager_ip.strip() cmd = f"echo {labmanager_ip} > /home/giguser/labmanager_ip" for timeout in range(20): time.sleep(0.5) if docker_client.containers.get(container_id).status == 'running': r = docker_client.containers.get(container_id).exec_run( f'sh -c "{cmd}"') logger.info(f"Response to write labmanager_ip in {tag}: {r}") break else: logger.error( "After 10 seconds could not write IP to labmanager container." f" Container status = {docker_client.containers.get(container_id).status}" ) return container_id