Python pos_tag Beispiele

Programmiersprache: Python

Namespace / Paketname: pythainlp

Methode / Funktion: pos_tag

Beispiele auf hotexamples.com: 6

Python pos_tag - 6 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die pythainlp.pos_tag, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

def sentence_similarity(sentence1, sentence2):
    """ compute the sentence similarity using Wordnet """
    # Tokenize and tag
    sentence1 = pos_tag(word_tokenize(sentence1), 'artagger')
    sentence2 = pos_tag(word_tokenize(sentence2), 'artagger')

    # Get the synsets for the tagged words
    synsets1 = [tagged_to_synset(*tagged_word) for tagged_word in sentence1]
    synsets2 = [tagged_to_synset(*tagged_word) for tagged_word in sentence2]

    # Filter out the Nones
    synsets1 = [ss for ss in synsets1 if ss]
    synsets2 = [ss for ss in synsets2 if ss]

    score, count = 0.0, 0

    # For each word in the first sentence
    for synset in synsets1:
        # Get the similarity value of the most similar word in the other sentence
        best_score = max([synset.path_similarity(ss) for ss in synsets2])

        # Check that the similarity could have been computed
        if best_score is not None:
            score += best_score
            count += 1

    # Average the values
    if count != 0:
        score /= count
    return score

Beispiel #2

Datei anzeigen

Datei: __init__.py Projekt: PyThaiNLP/thainlp

 def change_word_tokenize(self, name):
     if self.dictlist == []:
         self.words = WordList(
             pythainlp.word_tokenize(self.text, engine=name))
     else:
         self.words = WordList(pythainlp.word_tokenize(
             self.text, self.dict))
     self.tags = pythainlp.pos_tag(self.words)

Beispiel #3

Datei anzeigen

Datei: __init__.py Projekt: PyThaiNLP/thainlp

 def __init__(self, text, dictlist=[]):
     self.text = text
     self.dictlist = dictlist
     if self.dictlist == []:
         self.words = WordList(pythainlp.word_tokenize(self.text))
     else:
         self.dict = pythainlp.tokenize.dict_trie(self.dictlist)
         self.words = WordList(pythainlp.word_tokenize(
             self.text, self.dict))
     self.tags = pythainlp.pos_tag(self.words)
     self.romanize = [romanize_pythainlp(i) for i in self.words]
     self.word_counts = Counter(self.words)

Beispiel #4

Datei anzeigen

Datei: NLTKPreprocessor.py Projekt: wannaphong/numfa_server

    def tokenize(self, document):
        # Break the document into sentences
        for sent in sent_tokenize(document):
            # Break the sentence into part of speech tagged tokens
            for token, tag in pos_tag(wordpunct_tokenize(sent),'artagger'):
                # Apply preprocessing to the token
                token = token.lower() if self.lower else token
                token = token.strip() if self.strip else token
                token = token.strip('_') if self.strip else token
                token = token.strip('*') if self.strip else token

                # If stopword, ignore token and continue
                # if token in self.stopwords:
                #     continue

                # If punctuation, ignore token and continue
                if all(char in self.punct for char in token):
                    continue

                # Lemmatize the token and yield
                lemma = self.lemmatize(token, tag)
                yield lemma

Beispiel #5

Datei anzeigen

Datei: __init__.py Projekt: PyThaiNLP/thainlp

 def change_pos_tag(self, name):
     self.tags = pythainlp.pos_tag(self.words, engine=name)

Beispiel #6

Datei anzeigen

from pythainlp.tokenize import sent_tokenize, word_tokenize
from pythainlp import pos_tag
text = "ผมชื่อต้นตาล ผมอายุ 40 ปี ผมเล่นเกม"
#print(text)
sent = sent_tokenize(text)
print("จำนวนประโยค : {}".format(str(len(sent))))
#print(sent)
for i in range(0, len(sent)):
    print("Sentence {} is '{}'".format(
        i + 1, str(pos_tag(word_tokenize(sent[i]), corpus='orchid_ud'))))
#print(txt.split("\n"))