Python tagged_words Exemples

Langage de programmation: Python

Espace de nommage/Pack: nltk.corpus.ptb

Méthode/Fonction: tagged_words

Exemples au hotexamples.com: 9

Python tagged_words - 9 exemples trouvés. Ce sont les exemples réels les mieux notés de nltk.corpus.ptb.tagged_words extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Associées

Calculate

hg_ver

skew

HTTPCurlDownloader

player_warp_toalias_write

get_secret_by_name

_rot_trans

mkdir_p

CoordinatorForm

incident

Related in langs

Asset_Video (PHP)

jsstr (PHP)

HideUseItemStoryboard (C#)

ArregloCola (C#)

bot_gtk_param_widget_add_double (C++)

SDLGui_FileConfSelect (C++)

RegisterCType (Go)

Generator (Go)

ResponseEvent (Java)

Rechnung (Java)

Exemple #1

0

Afficher le fichier

Fichier : ptb_reader.py Projet : muakasan/parts_embeddings_pos

def get_word_to_posvec(): word_to_posvec = {} for fileid in ptb.fileids('news'): for (word, tag) in ptb.tagged_words(fileid, tagset='universal'): if word not in word_to_posvec: word_to_posvec[word] = [0] * len(_UNIVERSAL_TAGS) word_to_posvec[word][tag_to_index[tag]] += 1 return word_to_posvec

Exemple #2

0

Afficher le fichier

Fichier : test_corpora.py Projet : Geolem/nltk

def test_tagged_words(self): self.assertEqual( ptb.tagged_words('WSJ/00/WSJ_0003.MRG')[:3], [('A', 'DT'), ('form', 'NN'), ('of', 'IN')], )

Exemple #3

0

Afficher le fichier

Fichier : test_corpora.py Projet : Weiming-Hu/text-based-six-degree

def test_tagged_words(self): self.assertEqual( ptb.tagged_words('WSJ/00/WSJ_0003.MRG')[:3], [('A', 'DT'), ('form', 'NN'), ('of', 'IN')] )

Exemple #4

0

Afficher le fichier

def test_tagged_words(self): self.assertEqual( ptb.tagged_words("WSJ/00/WSJ_0003.MRG")[:3], [("A", "DT"), ("form", "NN"), ("of", "IN")], )

Exemple #5

0

Afficher le fichier

Fichier : RaghebAlGhezi_LING581_assignment4.py Projet : raghebalghezi/computational_linguistics

# -*- coding: utf-8 -*- import nltk from nltk.corpus import ptb tagged_corpus = ptb.tagged_words(categories=['news']) #print(len(tagged_corpus)) def nonWord_strip(x): return x != '-NONE-' and x != '-LRB-' and x != '-RRB-' and x != 'SYM' and x != ':' and x != '.' and x != ',' and x != '``' and x != "''" print("********* QUESTION 1 ***************") # words_without_lst = [x[0] for x in tagged_corpus if nonWord_strip(x[1])] #words_without_lst2 = [x[0].lower() for x in tagged_corpus if nonWord_strip(x[1])] print("The number of words without NON-words is ", len(words_without_lst)) words_without_set = set([ x[0] for x in ptb.tagged_words(categories=['news']) if nonWord_strip(x[1]) ]) print("The number of distinct words without NON-words is ", len(words_without_set)) print("Lexical diversity is ", len(words_without_set) / len(words_without_lst))

Exemple #6

0

Afficher le fichier

Fichier : word_model.py Projet : mike-n-7/CharPOS

def parse_file(f): for word, tag in ptb.tagged_words(f): if tag in common.OPEN_CLASSES: add_counts(word, super_model[tag]) elif tag in common.CLOSED_CLASSES: observe_closed(word, super_model[tag])

Exemple #7

0

Afficher le fichier

Fichier : test_corpora.py Projet : GloriousFt/TextBlob

def test_tagged_words(self): self.assertEqual(ptb.tagged_words("WSJ/00/WSJ_0003.MRG")[:3], [("A", "DT"), ("form", "NN"), ("of", "IN")])

Exemple #8

0

Afficher le fichier

Fichier : word_model.py Projet : mike-n-7/CharPOS

def parse_file(f): for word, tag in ptb.tagged_words(f): if tag in common.OPEN_CLASSES: add_counts(word, super_model[tag]) elif tag in common.CLOSED_CLASSES: observe_closed(word, super_model[tag])

Exemple #9

0

Afficher le fichier

print(docs[0].doctype) print(docs[0].date_time) print(docs[0].headline) print(docs[0].text) # doctest: +ELLIPSIS # parsed corpora print(treebank.fileids()) # doctest: +ELLIPSIS print(treebank.words('wsj_0003.mrg')) print(treebank.tagged_words('wsj_0003.mrg')) print(treebank.parsed_sents('wsj_0003.mrg') [0]) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE # nltk.download('ptb') print(ptb.fileids()) # doctest: +SKIP # download the corpus from here: https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/treebank.zip # then extract and place to the following location: .../nltk_data/corpora/ptb/ print(ptb.words('treebank/combined/wsj_0003.mrg')) # doctest: +SKIP print(ptb.tagged_words('treebank/combined/wsj_0003.mrg')) # doctest: +SKIP # print(ptb.categories()) # doctest: +SKIP # print(ptb.fileids('news')) # doctest: +SKIP # print(ptb.words(categories=['humor', 'fiction'])) # doctest: +SKIP # nltk.download('sinica_treebank') print(sinica_treebank.sents()) # doctest: +SKIP print(sinica_treebank.parsed_sents()[25]) # doctest: +SKIP # nltk.download('conll2007') print(conll2007.sents('esp.train')[0]) # doctest: +SKIP print(conll2007.parsed_sents('esp.train')[0]) # doctest: +SKIP print(conll2007.parsed_sents('esp.train')[0].tree()) # doctest: +SKIP # for tree in ycoe.parsed_sents('cocuraC')[:4]: # print(tree) # doctest: +SKIP # word lists and lexicons print(words.fileids()) print(words.words('en')) # doctest: +ELLIPSIS