Python preprocessの例

プログラミング言語: Python

名前空間/パッケージ名: rdt.nlp.pos

メソッド/関数: preprocess

hotexamples.comのコード掲載数: 4

Python preprocess - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのrdt.nlp.pos.preprocessの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: pos_iter.py プロジェクト: juchiyama/bigdata_fall2015

	def test_interactive(self):
		docs = self.source.find()
		docs.batch_size(1000)
		for ind, doc in enumerate(clean_html.doc_iter(docs)):
			new_doc = pos.preprocess(doc["cleansed_text"])
			print(new_doc)
			break

コード例 #2

ファイルを表示

ファイル: chunk.py プロジェクト: juchiyama/bigdata_fall2015

def simple_np_bgram(documents):
	bgram = BigramChunker(conll2000.chunked_sents('train.txt'))
	for doc in documents:
		buf = []
		for sent in pos.preprocess(doc):
			buf.append(bgram.parse(sent))
		yield buf

コード例 #3

ファイルを表示

ファイル: chunk.py プロジェクト: juchiyama/bigdata_fall2015

def simple_np_ugram(documents):
	ugram = UnigramChunker(conll2000.chunked_sents('train.txt'))

	"""String sentences get split up into a datastructure"""
	for doc in documents:
		buf = []
		for sent in pos.preprocess(doc):
			buf.append(ugram.parse(sent))
		yield buf

コード例 #4

ファイルを表示

ファイル: ugram.py プロジェクト: juchiyama/bigdata_fall2015

import nltk, rdt.nlp.pos as pos
from nltk.corpus import conll2000

class UnigramChunker(nltk.ChunkParserI):
	def __init__(self, train_sents):
		train_data = [[(t,c) for w,t,c in nltk.chunk.tree2conlltags(sent)]
					  for sent in train_sents]
		self.tagger = nltk.UnigramTagger(train_data)

	def parse(self, sentence):
		pos_tags = [pos for (word,pos) in sentence]
		tagged_pos_tags = self.tagger.tag(pos_tags)
		chunktags = [chunktag for (pos, chunktag) in tagged_pos_tags]
		conlltags = [(word, pos, chunktag) for ((word,pos),chunktag)
					 in zip(sentence, chunktags)]
		return nltk.chunk.conlltags2tree(conlltags)

if __name__ == "__main__":
	test_sents = conll2000.chunked_sents('test.txt', chunk_types=['NP'])
	train_sents = conll2000.chunked_sents('train.txt', chunk_types=['NP'])
	# print(train_sents)
	unigram_chunker = UnigramChunker(train_sents)
	print(unigram_chunker.evaluate(test_sents))
	d = [ unigram_chunker.parse(sent) for sent in pos.preprocess("The dog went to the park.")]
	print(d)
	# print(unigram_chunker.parse(pos.preprocess("The dog went to the park.")))