Esempi in Python per Token.nbor

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: spacy.tokens

Classe/tipologia: Token

Metodo/funzione: nbor

Esempi su hotexamples.com: 4

Token.nbor in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per spacy.tokens.Token.nbor, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

set_extension(30)

has_extension(23)

get_extension(8)

nbor(4)

remove_extension(4)

pos(2)

Token(1)

check_flag(1)

Esempio n. 1

Mostra file

def is_likely_proper(tok: Token, min_rank=200) -> bool:
    """Returns true if the spacy token is a likely proper name, based on its form.

    NB: this method should only be used for languages that have a distinction between
    lowercase and uppercase (so called bicameral scripts)."""

    # We require at least two characters
    if len(tok) < 2:
        return False

    # If the lemma is titled or in uppercase, just return True
    elif tok.lemma_.istitle() and len(tok.lemma_) > 2:
        return True
    elif tok.lemma_.isupper() and len(
            tok.lemma_) > 2 and tok.lemma_ != "-PRON-":
        return True
    # If there is no lemma, but the token is in uppercase, return true as well
    elif tok.lemma_ == "" and tok.is_upper:
        return True

    # We do not consider the 200 most common words as proper name
    elif (tok.lemma_.islower() and tok.lemma in tok.vocab.strings
          and tok.vocab[tok.lemma].rank < min_rank):
        return False

    # Handling cases such as iPad
    elif len(tok) > 2 and tok.text[0].islower() and tok.text[1].isupper():
        return True

    # Handling cases such as IceFog
    elif (len(tok) > 2 and tok.text[0].isupper()
          and any([k.islower() for k in tok.text[1:]])
          and any([k.isupper() for k in tok.text[1:]])):
        return True

    # Else, check whether the surface token is titled and is not sentence-initial
    # NB: This should be commented out for languages such as German
    elif (tok.i > 0 and tok.is_title and not tok.is_sent_start and
          tok.nbor(-1).text not in {'\'', '"', '‘', '“', '”', '’', "\n", "|"}
          and not tok.nbor(-1).text.endswith(".")):
        return True

    # If the part-of-speech is a proper noun
    elif tok.pos_ == "PROPN":
        return True

    # If the token is in lowercase but is a quite rare token
    elif len(tok) > 3 and tok.is_lower and len(
            tok.vocab.vectors) > 0 and tok.is_oov:
        return True

    return False

Esempio n. 2

Mostra file

    def _get_lookahead(self, token: Token, next_sentence_boundary: int) -> int:
        """Returns the longest possible span starting with the current token, and
        satisfying the three following criteria:
        - the maximum length of the span is self.lookahead
        - the span cannot start with a punctuation symbol or within a compound phrase
        - the span cannot cross sentence boundaries
        """

        if token.is_punct:
            return 0
        elif token.i > 0 and token.nbor(-1).dep_ == "compound" and token.nbor(-1).head == token:
            return 0

        return min(next_sentence_boundary-token.i, self.lookahead)

Esempio n. 3

Mostra file

def in_compound(tok: Token):
    """Returns true if the spacy token is part of a compound phrase"""

    if tok.dep_ == "compound":
        return True
    elif tok.i > 0 and tok.nbor(-1).dep_ == "compound":
        return True
    return False

Esempio n. 4

Mostra file

def last_token_of_entity(doc: Doc, token: Token) -> Token:
    """RECURSIVE. Given a token in an entity, it recurses to the right until it finds a token where the IOB is not Inside (I)"""
    """ HMGB1-induced -> induced"""
    next_token = token.nbor()
    is_end = next_token.ent_iob != 1
    if is_end:
        return token
    else:
        return last_token_of_entity(doc, next_token)