from nltk.book import text1 text1.concordance( "monstrous") # Show a concordance view of a word with its context text1.similar("monstrous") # Show words that appear in similar context text1.common_contexts(["monstrous", "very" ]) # Examine the context shared by two or more words print len(text1) # Count number of words and punctuations print len(set(text1)) # Print vocab size of the text print text3.count("smote") # Print num word occurence
import nltk from nltk import FreqDist from nltk.book import text1 print("\nSearch for all occurances of the parameter in the text with context:") text1.concordance("want") print( "\nSearch for other words that appear in similar contexts to the parameter" ) text1.similar("monstrous") text1.common_contexts(["test", "try"]) #Is blocking to the script continuing text1.dispersion_plot(["mean", "know"]) len(text1) set(text1) sorted(set(text1)) len(set(text1)) print("\nFind all words in a text meetine a predicate (word length)") V = set(text1) long_words = [w for w in V if len(w) > 15] print(sorted(long_words)) print("\nFind words to categorise a text") fdist1 = FreqDist(text1) print(sorted(w for w in set(text1) if len(w) > 9 and fdist1[w] > 8))
#/Users/randou/Esther/Brandeis/2019 Fall/LING131A NLP/Exercises # -*- coding: utf-8 -*- import nltk nltk.download() from nltk.book import * from nltk.book import text1 import pandas as pd # ============================================================================= # 1.3 Searching Text # ============================================================================= text1.concordance('monstrous') #appearance of a word text1.similar('monstrous') #words used in the similar context text1.common_contexts(['monstrous', 'mystifying']) text1.dispersion_plot(['love', 'peace', 'luck', 'fortune']) text1.generate() # ============================================================================= # 1.4 Counting Vocabulary # ============================================================================= len(text1) len(sorted(set(text1))) len(set(text1)) / len(text1) # lexical richness text1.count('love') def lexical_diversity(text): return len(set(text)) / len(text)
from nltk.book import text1 as t1 from nltk.book import text4 as t4 print '=================================' ''' 下载测试数据 ''' # nltk.download() print '===============查找关键词==================' t1.concordance("america") print '===============查找相似上下文===============' t1.similar("america") print '=============共同的语法结构=================' t1.common_contexts(['in', 'of']) print '=================词汇分布图=================' t4.dispersion_plot(['citizens', 'democaracy', 'freedom', 'america']) print '=================统计最常出现的词================' freList = nk.FreqDist(t1) freList.plot(50, cumulative=False) print '=================统计长度超过15的词===============' v = set(t1) long_words = filter(lambda x: len(x) > 15, v)[:10] print long_words print '=================常用双连词搭配===============' tuple = nk.bigrams(['all', 'in', 'of', 'take', 'like'])
print("hello world") import nltk nltk.download('book') from nltk.book import text1 text1 = nltk.book.text1 text1.concordance('monstrous') # busca concordancias en el text1 text1.similar('monstrous') text2 = nltk.book.text2 text2.similar('monstrous') text2.common_contexts(["monstrous", "very"]) text1.common_contexts(["monstrous", "whale"]) nltk.book.text4.dispersion_plot( ["citizens", "democracy", "freedom", "duties", "America"]) nltk.book.text3.generate() # Counting Vocabulary len(text1) len(text2) # Tokens (individual unit of text) and Vocabulary (distinct unit) len(nltk.book.text3) # count tokens len(set(nltk.book.text3)) # vocabulary # lexical richnes of a text len(set(text2)) / len(text2) # Function def lexical_diversity(text): return len(set(text)) / len(text)