def unserialize_doc(nlp, serialized_string): value = pickle.loads(serialized_string.decode('base64')) doc_byte_string = value[_DOC_BYTE_STRING] user_data = value[_USER_DATA] doc_hash = value[_HASH] doc = Doc(nlp.vocab).from_bytes(doc_byte_string) assert str(hash_string( doc.string)) == doc_hash, "the hash doesn't match the hash" doc.user_data = user_data return doc
def __call__(self, text): d = Doc(self.vocab, words=text[0], spaces=text[1]) d.user_data = text[2] return d