def test_unflatten(self): flattened = {"a.b.c": 1, "a.b.d": 0, "a.e.f.g.h": 2, "b": 3} unflattened = unflatten(flattened) assert unflattened == {"a": {"b": {"c": 1, "d": 0}, "e": {"f": {"g": {"h": 2}}}}, "b": 3} # should do nothing to a non-flat dictionary assert unflatten(unflattened) == unflattened
def test_unflatten(self): flattened = {"a.b.c": 1, "a.b.d": 0, "a.e.f.g.h": 2, "b": 3} unflattened = unflatten(flattened) assert unflattened == { "a": { "b": { "c": 1, "d": 0 }, "e": { "f": { "g": { "h": 2 } } } }, "b": 3 } # should do nothing to a non-flat dictionary assert unflatten(unflattened) == unflattened
def load_archive_from_folder(archive_file: str, cuda_device: int = -1, overrides: str = "", weights_file: str = None) -> Archive: # redirect to the cache, if necessary resolved_archive_file = cached_path(archive_file) logger.info(f"loading model from direactory {archive_file}") serialization_dir = resolved_archive_file # Check for supplemental files in archive fta_filename = os.path.join(serialization_dir, _FTA_NAME) if os.path.exists(fta_filename): with open(fta_filename, 'r') as fta_file: files_to_archive = json.loads(fta_file.read()) # Add these replacements to overrides replacements_dict: Dict[str, Any] = {} for key, filename in files_to_archive.items(): if not filename.startswith("/"): filename = os.path.join(serialization_dir, f"fta/{key}") replacements_dict[key] = filename overrides_dict = parse_overrides(overrides) combined_dict = with_fallback(preferred=unflatten(replacements_dict), fallback=overrides_dict) overrides = json.dumps(combined_dict) # Load config config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME), overrides) config.loading_from_archive = True if weights_file: weights_path = weights_file else: weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME) # Instantiate model. Use a duplicate of the config, as it will get consumed. model = Model.load(config.duplicate(), weights_file=weights_path, serialization_dir=serialization_dir, cuda_device=cuda_device) return Archive(model=model, config=config)
def _load_archive(archive_file: str, adapters_dir: str, cuda_device: int = -1, overrides: str = "", weights_file: str = None): """ Instantiates an Archive from an archived `tar.gz` file. Parameters ---------- archive_file: ``str`` The archive file to load the model from. weights_file: ``str``, optional (default = None) The weights file to use. If unspecified, weights.th in the archive_file will be used. cuda_device: ``int``, optional (default = -1) If `cuda_device` is >= 0, the model will be loaded onto the corresponding GPU. Otherwise it will be loaded onto the CPU. overrides: ``str``, optional (default = "") JSON overrides to apply to the unarchived ``Params`` object. """ # redirect to the cache, if necessary resolved_archive_file = cached_path(archive_file) if resolved_archive_file == archive_file: logger.info(f"loading archive file {archive_file}") else: logger.info(f"loading archive file {archive_file} from cache at {resolved_archive_file}") if os.path.isdir(resolved_archive_file): serialization_dir = resolved_archive_file else: # Extract archive to temp dir tempdir = tempfile.mkdtemp() logger.info(f"extracting archive file {resolved_archive_file} to temp dir {tempdir}") with tarfile.open(resolved_archive_file, 'r:gz') as archive: archive.extractall(tempdir) # Postpone cleanup until exit in case the unarchived contents are needed outside # this function. atexit.register(_cleanup_archive_dir, tempdir) serialization_dir = tempdir # Check for supplemental files in archive fta_filename = os.path.join(serialization_dir, "files_to_archive.json") if os.path.exists(fta_filename): with open(fta_filename, 'r') as fta_file: files_to_archive = json.loads(fta_file.read()) # Add these replacements to overrides replacements_dict: Dict[str, Any] = {} for key, original_filename in files_to_archive.items(): replacement_filename = os.path.join(serialization_dir, f"fta/{key}") if os.path.exists(replacement_filename): replacements_dict[key] = replacement_filename else: logger.warning(f"Archived file {replacement_filename} not found! At train time " f"this file was located at {original_filename}. This may be " "because you are loading a serialization directory. Attempting to " "load the file from its train-time location.") overrides_dict = parse_overrides(overrides) combined_dict = with_fallback(preferred=overrides_dict, fallback=unflatten(replacements_dict)) overrides = json.dumps(combined_dict) # Load config config = Params.from_file(os.path.join(serialization_dir, "config.json"), overrides) config.loading_from_archive = True if weights_file: weights_path = weights_file else: weights_path = os.path.join(serialization_dir, "weights.th") # Fallback for serialization directories. if not os.path.exists(weights_path): weights_path = os.path.join(serialization_dir, "best.th") # Instantiate model. Use a duplicate of the config, as it will get consumed. model = _load(config.duplicate(), adapters_dir=adapters_dir, weights_file=weights_path, serialization_dir=serialization_dir, cuda_device=cuda_device) return Archive(model=model, config=config)
def load_archive(archive_file: str, cuda_device: int = -1, overrides: str = "", weights_file: str = None) -> Archive: """ Instantiates an Archive from an archived `tar.gz` file. Parameters ---------- archive_file: ``str`` The archive file to load the model from. weights_file: ``str``, optional (default = None) The weights file to use. If unspecified, weights.th in the archive_file will be used. cuda_device: ``int``, optional (default = -1) If `cuda_device` is >= 0, the model will be loaded onto the corresponding GPU. Otherwise it will be loaded onto the CPU. overrides: ``str``, optional (default = "") JSON overrides to apply to the unarchived ``Params`` object. """ # redirect to the cache, if necessary resolved_archive_file = cached_path(archive_file) if resolved_archive_file == archive_file: logger.info(f"loading archive file {archive_file}") else: logger.info( f"loading archive file {archive_file} from cache at {resolved_archive_file}" ) tempdir = None if os.path.isdir(resolved_archive_file): serialization_dir = resolved_archive_file else: # Extract archive to temp dir tempdir = tempfile.mkdtemp() logger.info( f"extracting archive file {resolved_archive_file} to temp dir {tempdir}" ) with tarfile.open(resolved_archive_file, 'r:gz') as archive: archive.extractall(tempdir) serialization_dir = tempdir # Check for supplemental files in archive fta_filename = os.path.join(serialization_dir, _FTA_NAME) if os.path.exists(fta_filename): with open(fta_filename, 'r') as fta_file: files_to_archive = json.loads(fta_file.read()) # Add these replacements to overrides replacements_dict: Dict[str, Any] = {} for key, _ in files_to_archive.items(): replacement_filename = os.path.join(serialization_dir, f"fta/{key}") replacements_dict[key] = replacement_filename overrides_dict = parse_overrides(overrides) combined_dict = with_fallback(preferred=unflatten(replacements_dict), fallback=overrides_dict) overrides = json.dumps(combined_dict) # Load config config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME), overrides) config.loading_from_archive = True if weights_file: weights_path = weights_file else: weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME) # Instantiate model. Use a duplicate of the config, as it will get consumed. model = Model.load(config.duplicate(), weights_file=weights_path, serialization_dir=serialization_dir, cuda_device=cuda_device) if tempdir: # Clean up temp dir shutil.rmtree(tempdir) return Archive(model=model, config=config)
def load_archive(archive_file: str, cuda_device: int = -1, overrides: str = "", weights_file: str = None) -> Archive: """ Instantiates an Archive from an archived `tar.gz` file. Parameters ---------- archive_file: ``str`` The archive file to load the model from. weights_file: ``str``, optional (default = None) The weights file to use. If unspecified, weights.th in the archive_file will be used. cuda_device: ``int``, optional (default = -1) If `cuda_device` is >= 0, the model will be loaded onto the corresponding GPU. Otherwise it will be loaded onto the CPU. overrides: ``str``, optional (default = "") JSON overrides to apply to the unarchived ``Params`` object. """ # redirect to the cache, if necessary resolved_archive_file = cached_path(archive_file) if resolved_archive_file == archive_file: logger.info(f"loading archive file {archive_file}") else: logger.info(f"loading archive file {archive_file} from cache at {resolved_archive_file}") if os.path.isdir(resolved_archive_file): serialization_dir = resolved_archive_file else: # Extract archive to temp dir tempdir = tempfile.mkdtemp() logger.info(f"extracting archive file {resolved_archive_file} to temp dir {tempdir}") with tarfile.open(resolved_archive_file, 'r:gz') as archive: archive.extractall(tempdir) # Postpone cleanup until exit in case the unarchived contents are needed outside # this function. atexit.register(_cleanup_archive_dir, tempdir) serialization_dir = tempdir # Check for supplemental files in archive fta_filename = os.path.join(serialization_dir, _FTA_NAME) if os.path.exists(fta_filename): with open(fta_filename, 'r') as fta_file: files_to_archive = json.loads(fta_file.read()) # Add these replacements to overrides replacements_dict: Dict[str, Any] = {} for key, _ in files_to_archive.items(): replacement_filename = os.path.join(serialization_dir, f"fta/{key}") replacements_dict[key] = replacement_filename overrides_dict = parse_overrides(overrides) combined_dict = with_fallback(preferred=unflatten(replacements_dict), fallback=overrides_dict) overrides = json.dumps(combined_dict) # Load config config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME), overrides) config.loading_from_archive = True if weights_file: weights_path = weights_file else: weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME) # Instantiate model. Use a duplicate of the config, as it will get consumed. model = Model.load(config.duplicate(), weights_file=weights_path, serialization_dir=serialization_dir, cuda_device=cuda_device) return Archive(model=model, config=config)