Esempi in Python per Doc.is_parsed

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: spacy.tokens.doc

Classe/tipologia: Doc

Metodo/funzione: is_parsed

Esempi su hotexamples.com: 4

Doc.is_parsed in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per spacy.tokens.doc.Doc.is_parsed, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Doc(30)

set_extension(16)

has_extension(10)

read_bytes(8)

char_span(7)

ents(4)

from_bytes(4)

from_array(3)

is_parsed(3)

remove_extension(1)

retokenize(1)

sentiment(1)

tensor(1)

to_disk(1)

Esempio n. 1

Mostra file

    def __call__(self, doc: Doc):
        save_parsed = doc.is_parsed
        doc.is_parsed = False
        if self.split_matcher:
            matches = self.split_matcher(doc)
            for match_id, start, end in matches:
                token = doc[end - 1]
                token.is_sent_start = True
                if end - 2 >= 0 and doc[end - 2].is_sent_start is True:
                    doc[end - 2].is_sent_start = False
        if self.join_matcher:
            matches = self.join_matcher(doc)
            for match_id, start, end in matches:
                # If there is a sent start in the match, just remove it
                for token in doc[start:end]:
                    if token.is_sent_start:
                        token.is_sent_start = False
        if doc.is_sentenced:
            # Trim starting spaces
            sent_start = None
            for sent in doc.sents:
                sentlen = len(sent)
                first_non_space = 0
                while first_non_space < sentlen and sent[
                        first_non_space].is_space:
                    first_non_space += 1
                if first_non_space > 0 and first_non_space < sentlen:
                    sent[0].is_sent_start = False
                    sent[first_non_space].is_sent_start = True

        doc.is_parsed = save_parsed if doc.is_sentenced else True
        return doc

Esempio n. 2

Mostra file

 def __call__(self, doc : Doc):
     save_parsed = doc.is_parsed
     doc.is_parsed = False
     if self.split_matcher:
         matches = self.split_matcher(doc)
         for match_id, start, end in matches:
             token = doc[end-1]
             token.is_sent_start = True
             if end-2>=0 and doc[end-2].is_sent_start is True:
                 doc[end-2].is_sent_start = False
     if self.join_matcher:
         matches = self.join_matcher(doc)
         for match_id, start, end in matches:
             # If there is a sent start in the match, just remove it
             for token in doc[start:end]:
                 if token.is_sent_start:
                     token.is_sent_start = False
     doc.is_parsed = save_parsed if doc.is_sentenced else True
     return doc

Esempio n. 3

Mostra file

def load_and_transform(batch_id, in_loc, out_dir):
    out_loc = path.join(out_dir, '%d.txt' % batch_id)
    if path.exists(out_loc):
        return None
    print('Batch', batch_id)
    nlp = spacy.en.English(parser=False, tagger=False, matcher=False, entity=False)
    with io.open(out_loc, 'w', encoding='utf8') as out_file:
        with io.open(in_loc, 'rb') as in_file:
            for byte_string in Doc.read_bytes(in_file):
                doc = Doc(nlp.vocab).from_bytes(byte_string)
                doc.is_parsed = True
                out_file.write(transform_doc(doc))

Esempio n. 4

Mostra file

File: merge_text.py Progetto: atran/sense2vec

def load_and_transform(batch_id, in_loc, out_dir):
    out_loc = path.join(out_dir, '%d.txt' % batch_id)
    if path.exists(out_loc):
        return None
    print('Batch', batch_id)
    nlp = spacy.en.English(parser=False, tagger=False, matcher=False, entity=False)
    with io.open(out_loc, 'w', encoding='utf8') as out_file:
        with io.open(in_loc, 'rb') as in_file:
            for byte_string in Doc.read_bytes(in_file):
                doc = Doc(nlp.vocab).from_bytes(byte_string)
                doc.is_parsed = True
                out_file.write(transform_doc(doc))