Python Doc.from_bytes Examples

Programming Language: Python

Namespace/Package Name: spacy.tokens.doc

Class/Type: Doc

Method/Function: from_bytes

Examples at hotexamples.com: 5

Python Doc.from_bytes - 5 examples found. These are the top rated real world Python examples of spacy.tokens.doc.Doc.from_bytes extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Doc(30)

set_extension(16)

has_extension(10)

read_bytes(8)

char_span(7)

ents(4)

from_bytes(4)

from_array(3)

is_parsed(3)

remove_extension(1)

retokenize(1)

sentiment(1)

tensor(1)

to_disk(1)

Example #1

Show file

File: text.py Project: quadrismegistus/llp

	def get_spacy(self,load_from_file=False,model_name='en_core_web_sm'):
		import spacy
		global nlp
		if not nlp:
			#print('>> loading spacy...')
			nlp = spacy.load(model_name)

		doc=None
		if self.parsed and load_from_file:
			#print self.fnfn_spacy
			from spacy.tokens.doc import Doc

			try:
				for byte_string in Doc.read_bytes(open(self.fnfn_spacy, 'rb')):
					doc = Doc(nlp.vocab)
					doc.from_bytes(byte_string)
			except UnicodeDecodeError:
				print("!! UNICODE ERROR:",self.fnfn_spacy)
		#else:

		if not doc:
			#print '>> making spacy document for text',self.id
			txt=self.text
			txt=clean_text(txt)
			doc=nlp(txt)

		return doc

Example #2

Show file

File: test_home.py Project: zjcerwin/spaCy

def test_efficient_binary_serialization(doc):
    from spacy.tokens.doc import Doc

    byte_string = doc.to_bytes()
    open('moby_dick.bin', 'wb').write(byte_string)

    nlp = spacy.en.English()
    for byte_string in Doc.read_bytes(open('moby_dick.bin', 'rb')):
       doc = Doc(nlp.vocab)
       doc.from_bytes(byte_string)

Example #3

Show file

def read_docs(filepath):
    """Deserialize a list of documents + associated metadata"""
    spacy_parser = get_spacy_parser()
    data = pickle.load(open(filepath, 'rb'))
    for row in data:
        doc = Doc(spacy_parser.vocab)
        # read doc object from serialized byte array
        row['content'] = doc.from_bytes(row.pop('binary_content'))
    return data

Example #4

Show file

File: getting_started.py Project: izeye/samples-spacy

    dep_labels = []
    while token.head is not token:
        dep_labels.append(token.dep)
        token = token.head
    return dep_labels

for sentence in doc.sents:
    for token in sentence:
        print token
        print token.orth
        dep_labels = dependency_labels_to_root(token)
        print dep_labels
        for dep_label in dep_labels:
            print nlp.vocab.strings[dep_label]

doc = nlp(u"Mr. Best flew to New York on Saturday morning.")

for ent in doc.ents:
    print ent, ent.label_, ent.orth_
    print ent.root, ent.root.head, ent.root.head.pos, nlp.vocab.strings[ent.root.head.pos], ent.root.head.lemma_

from spacy.tokens.doc import Doc

byte_string = doc.to_bytes()
open('moby_dick.bin', 'wb').write(byte_string)

doc = Doc(nlp.vocab)
for byte_string in Doc.read_bytes(open('moby_dick.bin', 'rb')):
    doc.from_bytes(byte_string)
print doc

Example #5

Show file

File: spacynlp.py Project: hxiaom/ScienceIE

def read_doc(spacy_fname, nlp):
    print('reading ' + spacy_fname)
    byte_string = open(spacy_fname, 'rb').read()
    doc = Doc(nlp.vocab)
    doc.from_bytes(byte_string)
    return doc