Python Norms Examples

Programming Language: Python

Namespace/Package Name: finalfusion.norms

Class/Type: Norms

Examples at hotexamples.com: 9

Python Norms - 9 examples found. These are the top rated real world Python examples of finalfusion.norms.Norms extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Norms(8)

read_chunk(1)

Frequently Used Methods

Norms (8)

read_chunk (1)

Example #1

Show file

def test_slicing():
    norms = np.float32(np.random.random_sample(10))
    s = Norms(norms)
    assert np.allclose(norms[:], s[:])
    assert np.allclose(norms, s)

    for _ in range(250):
        upper = np.random.randint(-len(norms) * 3, len(norms) * 3)
        lower = np.random.randint(-len(norms) * 3, len(norms) * 3)
        step = np.random.randint(-len(norms) * 3, len(norms) * 3)
        ctx = pytest.raises(ValueError) if step == 0 else contextlib.suppress()

        assert np.allclose(norms[:upper], s[:upper])
        assert np.allclose(norms[lower:upper], s[lower:upper])
        with ctx:
            val = s[lower:upper:step]
        with ctx:
            assert np.allclose(norms[lower:upper:step], val)
        with ctx:
            val = s[:upper:step]
        with ctx:
            assert np.allclose(norms[:upper:step], val)
        with ctx:
            val = s[::step]
        with ctx:
            assert np.allclose(norms[::step], val)

Example #2

Show file

def test_set_norms(embeddings_fifu):
    n = Norms(np.ones(len(embeddings_fifu.vocab), dtype=np.float32))
    embeddings_fifu.norms = n
    assert np.allclose(n, embeddings_fifu.norms)
    embeddings_fifu.norms = None
    assert embeddings_fifu.norms is None
    with pytest.raises(AssertionError):
        embeddings_fifu.norms = "bla"
    with pytest.raises(AssertionError):
        embeddings_fifu.norms = np.ones(len(embeddings_fifu.vocab),
                                        dtype=np.float32)
    with pytest.raises(AssertionError):
        embeddings_fifu.norms = Norms(
            np.ones(len(embeddings_fifu.vocab) - 1, dtype=np.float32))
    with pytest.raises(AssertionError):
        embeddings_fifu.norms = Norms(
            np.ones(len(embeddings_fifu.vocab) + 1, dtype=np.float32))
    assert embeddings_fifu.norms is None

Example #3

Show file

File: embeddings.py Project: finalfusion/finalfusion-python

def load_finalfusion(file: Union[str, bytes, int, PathLike],
                     mmap: bool = False) -> Embeddings:
    """
    Read embeddings from a file in finalfusion format.

    Parameters
    ----------
    file : str, bytes, int, PathLike
        Path to a file with embeddings in finalfusoin format.
    mmap : bool
        Toggles memory mapping the storage buffer.

    Returns
    -------
    embeddings : Embeddings
        The embeddings from the input file.
    """
    with open(file, 'rb') as inf:
        _ = Header.read_chunk(inf)
        chunk_id, _ = _read_required_chunk_header(inf)
        norms = None
        metadata = None

        if chunk_id == ChunkIdentifier.Metadata:
            metadata = Metadata.read_chunk(inf)
            chunk_id, _ = _read_required_chunk_header(inf)

        if chunk_id == ChunkIdentifier.SimpleVocab:
            vocab = SimpleVocab.read_chunk(inf)  # type: Vocab
        elif chunk_id == ChunkIdentifier.BucketSubwordVocab:
            vocab = FinalfusionBucketVocab.read_chunk(inf)
        elif chunk_id == ChunkIdentifier.FastTextSubwordVocab:
            vocab = FastTextVocab.read_chunk(inf)
        elif chunk_id == ChunkIdentifier.ExplicitSubwordVocab:
            vocab = ExplicitVocab.read_chunk(inf)
        else:
            raise FinalfusionFormatError(
                f'Expected vocab chunk, not {str(chunk_id)}')

        chunk_id, _ = _read_required_chunk_header(inf)
        if chunk_id == ChunkIdentifier.NdArray:
            storage = NdArray.load(inf, mmap)  # type: Storage
        elif chunk_id == ChunkIdentifier.QuantizedArray:
            storage = QuantizedArray.load(inf, mmap)
        else:
            raise FinalfusionFormatError(
                f'Expected storage chunk, not {str(chunk_id)}')
        maybe_chunk_id = _read_chunk_header(inf)
        if maybe_chunk_id is not None:
            if maybe_chunk_id[0] == ChunkIdentifier.NdNorms:
                norms = Norms.read_chunk(inf)
            else:
                raise FinalfusionFormatError(
                    f'Expected norms chunk, not {str(chunk_id)}')

        return Embeddings(storage, vocab, norms, metadata, inf.name)

Example #4

Show file

def test_iter_sliced():
    norms = np.float32(np.random.random_sample(10))
    s = Norms(norms)
    for _ in range(250):
        upper = np.random.randint(-len(norms) * 3, len(norms) * 3)
        lower = np.random.randint(-len(norms) * 3, len(norms) * 3)
        step = np.random.randint(-len(norms) * 3, len(norms) * 3)
        if step == 0:
            continue
        for norms_row, norms_row in zip(s[lower:upper:step],
                                        norms[lower:upper:step]):
            assert np.allclose(norms_row, norms_row)

Example #5

Show file

def test_write_sliced(tmp_path):
    filename = tmp_path / "write_sliced.fifu"
    norms = np.float32(np.random.random_sample(10))
    s = Norms(norms)
    for _ in range(250):
        upper = np.random.randint(-len(norms) * 3, len(norms) * 3)
        lower = np.random.randint(-len(norms) * 3, len(norms) * 3)
        step = np.random.randint(-len(norms) * 3, len(norms) * 3)
        if step == 0:
            continue
        s[lower:upper:step].write(filename)
        s2 = load_norms(filename)
        assert np.allclose(norms[lower:upper:step], s2)

Example #6

Show file

def test_nonascii_whitespace_text_roundtrip(tmp_path):
    vocab = ["\u00A0"]
    storage = np.ones((1, 5), dtype=np.float32)
    norms = np.linalg.norm(storage, axis=1)
    storage /= norms[:, None]
    embeds = Embeddings(NdArray(storage),
                        SimpleVocab(vocab),
                        norms=Norms(norms))
    filename = tmp_path / "non-ascii.txt"
    write_text(filename, embeds)
    text = load_text(filename)
    assert embeds.vocab == text.vocab, f'{embeds.vocab.words}{text.vocab.words}'
    assert np.allclose(embeds.storage, text.storage)
    assert np.allclose(embeds.norms, text.norms)

Example #7

Show file

def test_slice_slice():
    for _ in range(250):
        norms = np.float32(np.random.random_sample(100))
        s = Norms(norms)
        assert np.allclose(norms[:], s[:])
        assert np.allclose(norms, s)
        for _ in range(5):
            if len(norms) == 0:
                break
            upper = np.random.randint(-len(norms) * 2, len(norms) * 2)
            lower = np.random.randint(-len(norms) * 2, len(norms) * 2)
            step = np.random.randint(-len(norms) * 2, len(norms) * 2)
            ctx = pytest.raises(
                ValueError) if step == 0 else contextlib.suppress()
            with ctx:
                norms = norms[lower:upper:step]
            with ctx:
                s = s[lower:upper:step]
            assert np.allclose(norms, s)

Example #8

Show file

File: _util.py Project: finalfusion/finalfusion-python

def _normalize_matrix(storage: np.ndarray) -> Norms:
    norms = np.linalg.norm(storage, axis=1)
    storage /= norms[:, None]
    return Norms(norms)

Example #9

Show file

File: _util.py Project: finalfusion/ffp

def _normalize_ndarray_storage(storage: NdArray) -> Norms:
    norms = np.linalg.norm(storage, axis=1)
    storage /= norms[:, None]
    return Norms(norms)