Python Vocab.to_bytes Exemples

Langage de programmation: Python

Espace de nommage/Pack: spacy.vocab

Class/Type: Vocab

Méthode/Fonction: to_bytes

Exemples au hotexamples.com: 12

Python Vocab.to_bytes - 12 exemples trouvés. Ce sont les exemples réels les mieux notés de spacy.vocab.Vocab.to_bytes extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Vocab(30)

set_vector(21)

to_bytes(9)

from_bytes(7)

to_disk(7)

from_dir(5)

dump(4)

from_disk(4)

prune_vectors(2)

reset_vectors(2)

vectors(2)

deduplicate_vectors(1)

load(1)

Méthodes fréquemment utilisées

Vocab (30)

set_vector (21)

to_bytes (9)

from_bytes (7)

to_disk (7)

from_dir (5)

dump (4)

from_disk (4)

prune_vectors (2)

reset_vectors (2)

Méthodes fréquemment utilisées

vectors (2)

deduplicate_vectors (1)

load (1)

Exemple #1

0

Afficher le fichier

Fichier : test_vectors.py Projet : richardpaulhudson/spaCy

def test_vectors_deduplicate(): data = OPS.asarray([[1, 1], [2, 2], [3, 4], [1, 1], [3, 4]], dtype="f") v = Vectors(data=data, keys=["a1", "b1", "c1", "a2", "c2"]) vocab = Vocab() vocab.vectors = v # duplicate vectors do not use the same keys assert (vocab.vectors.key2row[v.strings["a1"]] != vocab.vectors.key2row[v.strings["a2"]]) assert (vocab.vectors.key2row[v.strings["c1"]] != vocab.vectors.key2row[v.strings["c2"]]) vocab.deduplicate_vectors() # there are three unique vectors assert vocab.vectors.shape[0] == 3 # the uniqued data is the same as the deduplicated data assert_equal( numpy.unique(OPS.to_numpy(vocab.vectors.data), axis=0), OPS.to_numpy(vocab.vectors.data), ) # duplicate vectors use the same keys now assert (vocab.vectors.key2row[v.strings["a1"]] == vocab.vectors.key2row[ v.strings["a2"]]) assert (vocab.vectors.key2row[v.strings["c1"]] == vocab.vectors.key2row[ v.strings["c2"]]) # deduplicating again makes no changes vocab_b = vocab.to_bytes() vocab.deduplicate_vectors() assert vocab_b == vocab.to_bytes()

Exemple #2

0

Afficher le fichier

Fichier : test_lookups.py Projet : cisco00/Sentimental-Analysis-on-threat

def test_lookups_to_from_bytes_via_vocab(): table_name = "test" vocab = Vocab() vocab.lookups.add_table(table_name, {"foo": "bar", "hello": "world"}) assert table_name in vocab.lookups vocab_bytes = vocab.to_bytes() new_vocab = Vocab() new_vocab.from_bytes(vocab_bytes) assert len(new_vocab.lookups) == len(vocab.lookups) assert table_name in new_vocab.lookups table = new_vocab.lookups.get_table(table_name) assert len(table) == 2 assert table["hello"] == "world" assert new_vocab.to_bytes() == vocab_bytes

Exemple #3

0

Afficher le fichier

def test_serialize_vocab_roundtrip_bytes(strings1, strings2): vocab1 = Vocab(strings=strings1) vocab2 = Vocab(strings=strings2) vocab1_b = vocab1.to_bytes() vocab2_b = vocab2.to_bytes() if strings1 == strings2: assert vocab1_b == vocab2_b else: assert vocab1_b != vocab2_b vocab1 = vocab1.from_bytes(vocab1_b) assert vocab1.to_bytes() == vocab1_b new_vocab1 = Vocab().from_bytes(vocab1_b) assert new_vocab1.to_bytes() == vocab1_b assert len(new_vocab1.strings) == len(strings1) + 1 # adds _SP assert sorted([s for s in new_vocab1.strings]) == sorted(strings1 + ["_SP"])

Exemple #4

0

Afficher le fichier

def test_serialize_vocab_roundtrip_bytes(strings1, strings2): vocab1 = Vocab(strings=strings1) vocab2 = Vocab(strings=strings2) vocab1_b = vocab1.to_bytes() vocab2_b = vocab2.to_bytes() if strings1 == strings2: assert vocab1_b == vocab2_b else: assert vocab1_b != vocab2_b vocab1 = vocab1.from_bytes(vocab1_b) assert vocab1.to_bytes() == vocab1_b new_vocab1 = Vocab().from_bytes(vocab1_b) assert new_vocab1.to_bytes() == vocab1_b assert len(new_vocab1) == len(strings1) assert sorted([lex.text for lex in new_vocab1]) == sorted(strings1)

Exemple #5

0

Afficher le fichier

Fichier : test_serialize_vocab_strings.py Projet : spacy-io/spaCy

def test_serialize_vocab_roundtrip_bytes(strings1, strings2): vocab1 = Vocab(strings=strings1) vocab2 = Vocab(strings=strings2) vocab1_b = vocab1.to_bytes() vocab2_b = vocab2.to_bytes() if strings1 == strings2: assert vocab1_b == vocab2_b else: assert vocab1_b != vocab2_b vocab1 = vocab1.from_bytes(vocab1_b) assert vocab1.to_bytes() == vocab1_b new_vocab1 = Vocab().from_bytes(vocab1_b) assert new_vocab1.to_bytes() == vocab1_b assert len(new_vocab1) == len(strings1) assert sorted([lex.text for lex in new_vocab1]) == sorted(strings1)

Exemple #6

0

Afficher le fichier

def test_serialize_vocab_lex_attrs_bytes(strings, lex_attr): vocab1 = Vocab(strings=strings) vocab2 = Vocab() vocab1[strings[0]].norm_ = lex_attr assert vocab1[strings[0]].norm_ == lex_attr assert vocab2[strings[0]].norm_ != lex_attr vocab2 = vocab2.from_bytes(vocab1.to_bytes()) assert vocab2[strings[0]].norm_ == lex_attr

Exemple #7

0

Afficher le fichier

Fichier : test_serialize_vocab_strings.py Projet : spacy-io/spaCy

def test_serialize_vocab_lex_attrs_bytes(strings, lex_attr): vocab1 = Vocab(strings=strings) vocab2 = Vocab() vocab1[strings[0]].norm_ = lex_attr assert vocab1[strings[0]].norm_ == lex_attr assert vocab2[strings[0]].norm_ != lex_attr vocab2 = vocab2.from_bytes(vocab1.to_bytes()) assert vocab2[strings[0]].norm_ == lex_attr

Exemple #8

0

Afficher le fichier

def test_pickle_vocab(strings, lex_attr): vocab = Vocab(strings=strings) ops = get_current_ops() vectors = Vectors(data=ops.xp.zeros((10, 10)), mode="floret", hash_count=1) vocab.vectors = vectors vocab[strings[0]].norm_ = lex_attr vocab_pickled = pickle.dumps(vocab) vocab_unpickled = pickle.loads(vocab_pickled) assert vocab.to_bytes() == vocab_unpickled.to_bytes() assert vocab_unpickled.vectors.mode == "floret"

Exemple #9

0

Afficher le fichier

def test_deserialize_vocab_seen_entries(strings, lex_attr): # Reported in #2153 vocab = Vocab(strings=strings) vocab.from_bytes(vocab.to_bytes()) assert len(vocab.strings) == len(strings) + 1 # adds _SP

Exemple #10

0

Afficher le fichier

def test_serialize_vocab(en_vocab, text): text_hash = en_vocab.strings.add(text) vocab_bytes = en_vocab.to_bytes(exclude=["lookups"]) new_vocab = Vocab().from_bytes(vocab_bytes) assert new_vocab.strings[text_hash] == text assert new_vocab.to_bytes(exclude=["lookups"]) == vocab_bytes

Exemple #11

0

Afficher le fichier

def test_pickle_vocab(strings, lex_attr): vocab = Vocab(strings=strings) vocab[strings[0]].norm_ = lex_attr vocab_pickled = pickle.dumps(vocab) vocab_unpickled = pickle.loads(vocab_pickled) assert vocab.to_bytes() == vocab_unpickled.to_bytes()

Exemple #12

0

Afficher le fichier

Fichier : test_serialize_vocab_strings.py Projet : spacy-io/spaCy

def test_deserialize_vocab_seen_entries(strings, lex_attr): # Reported in #2153 vocab = Vocab(strings=strings) length = len(vocab) vocab.from_bytes(vocab.to_bytes()) assert len(vocab) == length