Esempio n. 1
0
def load_metadata(file: Union[str, bytes, int, PathLike]) -> Metadata:
    """
    Load a Metadata chunk from the given file.

    Parameters
    ----------
    file : str, bytes, int, PathLike
        Finalfusion file with a metadata chunk.

    Returns
    -------
    metadata : Metadata
        The Metadata from the file.

    Raises
    ------
    ValueError
        If the file did not contain an Metadata chunk.
    """
    with open(file, 'rb') as inf:
        chunk = find_chunk(inf, [ChunkIdentifier.Metadata])
        if chunk is None:
            raise ValueError("File did not contain a Metadata chunk")
        if chunk == ChunkIdentifier.Metadata:
            return Metadata.read_chunk(inf)
        raise ValueError(f"unexpected chunk: {str(chunk)}")
Esempio n. 2
0
def load_norms(file: Union[str, bytes, int, PathLike]) -> Norms:
    """
    Load an Norms chunk from the given file.

    Parameters
    ----------
    file : str, bytes, int, PathLike
        Finalfusion file with a norms chunk.

    Returns
    -------
    storage : Norms
        The Norms from the file.

    Raises
    ------
    ValueError
        If the file did not contain an Norms chunk.
    """
    with open(file, "rb") as inf:
        chunk = find_chunk(inf, [ChunkIdentifier.NdNorms])
        if chunk is None:
            raise ValueError("File did not contain a Norms chunk")
        if chunk == ChunkIdentifier.NdNorms:
            return Norms.read_chunk(inf)
        raise ValueError(f"unexpected chunk: {str(chunk)}")
Esempio n. 3
0
def load_storage(file: Union[str, bytes, int, PathLike],
                 mmap: bool = False) -> Storage:
    """
    Load any storage from a finalfusion file.

    Loads the first known storage from a finalfusion file.

    Parameters
    ----------
    file : str
        Path to file containing a finalfusion storage chunk.
    mmap : bool
        Toggles memory mapping the storage buffer as read-only.

    Returns
    -------
    vocab : Union[ffp.storage.NdArray, ffp.storage.QuantizedArray]
        First storage in the file.

    Raises
    ------
    ValueError
         If the file did not contain a storage.
    """
    with open(file, "rb") as inf:
        chunk = find_chunk(
            inf, [ChunkIdentifier.NdArray, ChunkIdentifier.QuantizedArray])
        if chunk is None:
            raise ValueError('File did not contain a storage')
        if chunk == ChunkIdentifier.NdArray:
            return NdArray.load(inf, mmap)
        if chunk == ChunkIdentifier.QuantizedArray:
            return QuantizedArray.load(inf, mmap)
        raise ValueError('Unexpected storage chunk.')
Esempio n. 4
0
def load_ndarray(file: Union[str, bytes, int, PathLike],
                 mmap: bool = False) -> NdArray:
    """
    Load an array chunk from the given file.

    Parameters
    ----------
    file : str, bytes, int, PathLike
        Finalfusion file with a ndarray chunk.
    mmap : bool
        Toggles memory mapping the array buffer as read only.

    Returns
    -------
    storage : NdArray
        The NdArray storage from the file.

    Raises
    ------
    ValueError
        If the file did not contain an NdArray chunk.
    """
    with open(file, "rb") as inf:
        chunk = find_chunk(inf, [ChunkIdentifier.NdArray])
        if chunk is None:
            raise ValueError("File did not contain a NdArray chunk")
        if chunk == ChunkIdentifier.NdArray:
            if mmap:
                return NdArray.mmap_storage(inf)
            return NdArray.read_chunk(inf)
        raise ValueError(f"unknown storage type: {chunk}")
Esempio n. 5
0
def load_simple_vocab(file: Union[str, bytes, int, PathLike]) -> SimpleVocab:
    """
    Load a SimpleVocab from the given finalfusion file.

    Parameters
    ----------
    file : str, bytes, int, PathLike
        Path to file containing a SimpleVocab chunk.

    Returns
    -------
    vocab : SimpleVocab
        Returns the first SimpleVocab in the file.
    """
    with open(file, "rb") as inf:
        chunk = find_chunk(inf, [ChunkIdentifier.SimpleVocab])
        if chunk is None:
            raise ValueError('File did not contain a SimpleVocab}')
        return SimpleVocab.read_chunk(inf)
Esempio n. 6
0
def load_fasttext_vocab(
        file: Union[str, bytes, int, PathLike]) -> FastTextVocab:
    """
    Load a FastTextVocab from the given finalfusion file.

    Parameters
    ----------
    file : str, bytes, int, PathLike
        Path to file containing a FastTextVocab chunk.

    Returns
    -------
    vocab : FastTextVocab
        Returns the first FastTextVocab in the file.
    """
    with open(file, "rb") as inf:
        chunk = find_chunk(inf, [ChunkIdentifier.FastTextSubwordVocab])
        if chunk is None:
            raise ValueError('File did not contain a FastTextVocab}')
        return FastTextVocab.read_chunk(inf)
Esempio n. 7
0
def load_vocab(file: Union[str, bytes, int, PathLike]) -> Vocab:
    """
    Load a vocabulary from a finalfusion file.

    Loads the first known vocabulary from a finalfusion file.

    Parameters
    ----------
    file : str, bytes, int, PathLike
        Path to file containing a finalfusion vocab chunk.

    Returns
    -------
    vocab : SimpleVocab, FastTextVocab, FinalfusionBucketVocab, ExplicitVocab
        First Vocab in the file.

    Raises
    ------
    ValueError
         If the file did not contain a vocabulary.
    """
    with open(file, "rb") as inf:
        chunk = find_chunk(inf, [
            ChunkIdentifier.SimpleVocab, ChunkIdentifier.FastTextSubwordVocab,
            ChunkIdentifier.ExplicitSubwordVocab,
            ChunkIdentifier.BucketSubwordVocab
        ])
        if chunk is None:
            raise ValueError('File did not contain a vocabulary')
        if chunk == ChunkIdentifier.SimpleVocab:
            return SimpleVocab.read_chunk(inf)
        if chunk == ChunkIdentifier.BucketSubwordVocab:
            return FinalfusionBucketVocab.read_chunk(inf)
        if chunk == ChunkIdentifier.ExplicitSubwordVocab:
            return ExplicitVocab.read_chunk(inf)
        if chunk == ChunkIdentifier.FastTextSubwordVocab:
            return FastTextVocab.read_chunk(inf)
        raise ValueError('Unexpected vocabulary chunk.')