Python Token.lemmaの例

プログラミング言語: Python

名前空間/パッケージ名: ft.onto.base_ontology

クラス/型: Token

メソッド/関数: lemma

hotexamples.comのコード掲載数: 3

Python Token.lemma - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのft.onto.base_ontology.Token.lemmaの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Token(25)

pos(8)

set_fields(6)

lemma(3)

chunk(2)

ud_xpos(2)

ner(1)

sense(1)

コード例 #1

ファイルを表示

    def _process_parser(self, sentences, input_pack: DataPack):
        """Parse the sentence. Default behaviour is to segment sentence, POSTag
        and Lemmatize.

        Args:
            sentences: Generator object which yields sentences in document
            input_pack: input pack which needs to be modified

        Returns:

        """
        for sentence in sentences:
            Sentence(input_pack, sentence.start_char, sentence.end_char)

            if "tokenize" in self.processors:
                # Iterating through spaCy token objects
                for word in sentence:
                    begin_pos_word = word.idx
                    end_pos_word = begin_pos_word + len(word.text)
                    token = Token(input_pack, begin_pos_word, end_pos_word)

                    if "pos" in self.processors:
                        token.pos = word.tag_

                    if "lemma" in self.processors:
                        token.lemma = word.lemma_

コード例 #2

ファイルを表示

ファイル: stanza_processor.py プロジェクト: jzpang/forte-wrappers

    def _process(self, input_pack: DataPack):
        doc = input_pack.text

        if len(doc) == 0:
            logging.warning("Find empty text in doc.")

        # sentence parsing
        sentences = self.nlp(doc).sentences

        # Iterating through stanfordnlp sentence objects
        for sentence in sentences:
            Sentence(
                input_pack,
                sentence.tokens[0].start_char,
                sentence.tokens[-1].end_char,
            )

            tokens: List[Token] = []
            if "tokenize" in self.processors:
                # Iterating through stanfordnlp word objects
                for word in sentence.words:
                    misc = word.misc.split("|")

                    t_start = -1
                    t_end = -1
                    for m in misc:
                        k, v = m.split("=")
                        if k == "start_char":
                            t_start = int(v)
                        elif k == "end_char":
                            t_end = int(v)

                    if t_start < 0 or t_end < 0:
                        raise ValueError(
                            "Cannot determine word start or end for "
                            "stanfordnlp."
                        )

                    token = Token(input_pack, t_start, t_end)

                    if "pos" in self.processors:
                        token.pos = word.pos
                        token.ud_xpos = word.xpos

                    if "lemma" in self.processors:
                        token.lemma = word.lemma

                    tokens.append(token)

            # For each sentence, get the dependency relations among tokens
            if "depparse" in self.processors:
                # Iterating through token entries in current sentence
                for token, word in zip(tokens, sentence.words):
                    child = token  # current token
                    parent = tokens[word.head - 1]  # Head token
                    relation_entry = Dependency(input_pack, parent, child)
                    relation_entry.rel_type = word.deprel

コード例 #3

ファイルを表示

    def _process(self, input_pack: DataPack):
        doc = input_pack.text
        end_pos = 0

        # sentence parsing
        sentences = self.nlp(doc).sentences  # type: ignore

        # Iterating through stanfordnlp sentence objects
        for sentence in sentences:
            begin_pos = doc.find(sentence.words[0].text, end_pos)
            end_pos = doc.find(sentence.words[-1].text, begin_pos) + len(
                sentence.words[-1].text)
            sentence_entry = Sentence(input_pack, begin_pos, end_pos)

            tokens: List[Token] = []
            if "tokenize" in self.processors:
                offset = sentence_entry.span.begin
                end_pos_word = 0

                # Iterating through stanfordnlp word objects
                for word in sentence.words:
                    begin_pos_word = sentence_entry.text. \
                        find(word.text, end_pos_word)
                    end_pos_word = begin_pos_word + len(word.text)
                    token = Token(input_pack, begin_pos_word + offset,
                                  end_pos_word + offset)

                    if "pos" in self.processors:
                        token.pos = word.pos
                        token.ud_xpos = word.xpos

                    if "lemma" in self.processors:
                        token.lemma = word.lemma

                    tokens.append(token)

            # For each sentence, get the dependency relations among tokens
            if "depparse" in self.processors:
                # Iterating through token entries in current sentence
                for token, word in zip(tokens, sentence.words):
                    child = token  # current token
                    parent = tokens[word.governor - 1]  # Root token
                    relation_entry = Dependency(input_pack, parent, child)
                    relation_entry.rel_type = word.dependency_relation