def load_metadata(file: Union[str, bytes, int, PathLike]) -> Metadata: """ Load a Metadata chunk from the given file. Parameters ---------- file : str, bytes, int, PathLike Finalfusion file with a metadata chunk. Returns ------- metadata : Metadata The Metadata from the file. Raises ------ ValueError If the file did not contain an Metadata chunk. """ with open(file, 'rb') as inf: chunk = find_chunk(inf, [ChunkIdentifier.Metadata]) if chunk is None: raise ValueError("File did not contain a Metadata chunk") if chunk == ChunkIdentifier.Metadata: return Metadata.read_chunk(inf) raise ValueError(f"unexpected chunk: {str(chunk)}")
def load_norms(file: Union[str, bytes, int, PathLike]) -> Norms: """ Load an Norms chunk from the given file. Parameters ---------- file : str, bytes, int, PathLike Finalfusion file with a norms chunk. Returns ------- storage : Norms The Norms from the file. Raises ------ ValueError If the file did not contain an Norms chunk. """ with open(file, "rb") as inf: chunk = find_chunk(inf, [ChunkIdentifier.NdNorms]) if chunk is None: raise ValueError("File did not contain a Norms chunk") if chunk == ChunkIdentifier.NdNorms: return Norms.read_chunk(inf) raise ValueError(f"unexpected chunk: {str(chunk)}")
def load_storage(file: Union[str, bytes, int, PathLike], mmap: bool = False) -> Storage: """ Load any storage from a finalfusion file. Loads the first known storage from a finalfusion file. Parameters ---------- file : str Path to file containing a finalfusion storage chunk. mmap : bool Toggles memory mapping the storage buffer as read-only. Returns ------- vocab : Union[ffp.storage.NdArray, ffp.storage.QuantizedArray] First storage in the file. Raises ------ ValueError If the file did not contain a storage. """ with open(file, "rb") as inf: chunk = find_chunk( inf, [ChunkIdentifier.NdArray, ChunkIdentifier.QuantizedArray]) if chunk is None: raise ValueError('File did not contain a storage') if chunk == ChunkIdentifier.NdArray: return NdArray.load(inf, mmap) if chunk == ChunkIdentifier.QuantizedArray: return QuantizedArray.load(inf, mmap) raise ValueError('Unexpected storage chunk.')
def load_ndarray(file: Union[str, bytes, int, PathLike], mmap: bool = False) -> NdArray: """ Load an array chunk from the given file. Parameters ---------- file : str, bytes, int, PathLike Finalfusion file with a ndarray chunk. mmap : bool Toggles memory mapping the array buffer as read only. Returns ------- storage : NdArray The NdArray storage from the file. Raises ------ ValueError If the file did not contain an NdArray chunk. """ with open(file, "rb") as inf: chunk = find_chunk(inf, [ChunkIdentifier.NdArray]) if chunk is None: raise ValueError("File did not contain a NdArray chunk") if chunk == ChunkIdentifier.NdArray: if mmap: return NdArray.mmap_storage(inf) return NdArray.read_chunk(inf) raise ValueError(f"unknown storage type: {chunk}")
def load_simple_vocab(file: Union[str, bytes, int, PathLike]) -> SimpleVocab: """ Load a SimpleVocab from the given finalfusion file. Parameters ---------- file : str, bytes, int, PathLike Path to file containing a SimpleVocab chunk. Returns ------- vocab : SimpleVocab Returns the first SimpleVocab in the file. """ with open(file, "rb") as inf: chunk = find_chunk(inf, [ChunkIdentifier.SimpleVocab]) if chunk is None: raise ValueError('File did not contain a SimpleVocab}') return SimpleVocab.read_chunk(inf)
def load_fasttext_vocab( file: Union[str, bytes, int, PathLike]) -> FastTextVocab: """ Load a FastTextVocab from the given finalfusion file. Parameters ---------- file : str, bytes, int, PathLike Path to file containing a FastTextVocab chunk. Returns ------- vocab : FastTextVocab Returns the first FastTextVocab in the file. """ with open(file, "rb") as inf: chunk = find_chunk(inf, [ChunkIdentifier.FastTextSubwordVocab]) if chunk is None: raise ValueError('File did not contain a FastTextVocab}') return FastTextVocab.read_chunk(inf)
def load_vocab(file: Union[str, bytes, int, PathLike]) -> Vocab: """ Load a vocabulary from a finalfusion file. Loads the first known vocabulary from a finalfusion file. Parameters ---------- file : str, bytes, int, PathLike Path to file containing a finalfusion vocab chunk. Returns ------- vocab : SimpleVocab, FastTextVocab, FinalfusionBucketVocab, ExplicitVocab First Vocab in the file. Raises ------ ValueError If the file did not contain a vocabulary. """ with open(file, "rb") as inf: chunk = find_chunk(inf, [ ChunkIdentifier.SimpleVocab, ChunkIdentifier.FastTextSubwordVocab, ChunkIdentifier.ExplicitSubwordVocab, ChunkIdentifier.BucketSubwordVocab ]) if chunk is None: raise ValueError('File did not contain a vocabulary') if chunk == ChunkIdentifier.SimpleVocab: return SimpleVocab.read_chunk(inf) if chunk == ChunkIdentifier.BucketSubwordVocab: return FinalfusionBucketVocab.read_chunk(inf) if chunk == ChunkIdentifier.ExplicitSubwordVocab: return ExplicitVocab.read_chunk(inf) if chunk == ChunkIdentifier.FastTextSubwordVocab: return FastTextVocab.read_chunk(inf) raise ValueError('Unexpected vocabulary chunk.')