Exemple #1
0
from nltk.book import text1

text1.concordance(
    "monstrous")  # Show a concordance view of a word with its context

text1.similar("monstrous")  # Show words that appear in similar context

text1.common_contexts(["monstrous", "very"
                       ])  # Examine the context shared by two or more words

print len(text1)  # Count number of words and punctuations

print len(set(text1))  # Print vocab size of the text

print text3.count("smote")  # Print num word occurence
Exemple #2
0
import nltk

from nltk import FreqDist
from nltk.book import text1

print("\nSearch for all occurances of the parameter in the text with context:")
text1.concordance("want")

print(
    "\nSearch for other words that appear in similar contexts to the parameter"
)
text1.similar("monstrous")

text1.common_contexts(["test", "try"])

#Is blocking to the script continuing
text1.dispersion_plot(["mean", "know"])

len(text1)
set(text1)
sorted(set(text1))
len(set(text1))

print("\nFind all words in a text meetine a predicate (word length)")
V = set(text1)
long_words = [w for w in V if len(w) > 15]
print(sorted(long_words))

print("\nFind words to categorise a text")
fdist1 = FreqDist(text1)
print(sorted(w for w in set(text1) if len(w) > 9 and fdist1[w] > 8))
#/Users/randou/Esther/Brandeis/2019 Fall/LING131A NLP/Exercises
# -*- coding: utf-8 -*-

import nltk
nltk.download()
from nltk.book import *
from nltk.book import text1
import pandas as pd

# =============================================================================
# 1.3 Searching Text
# =============================================================================

text1.concordance('monstrous')  #appearance of a word
text1.similar('monstrous')  #words used in the similar context
text1.common_contexts(['monstrous', 'mystifying'])
text1.dispersion_plot(['love', 'peace', 'luck', 'fortune'])
text1.generate()

# =============================================================================
# 1.4 Counting Vocabulary
# =============================================================================

len(text1)
len(sorted(set(text1)))
len(set(text1)) / len(text1)  # lexical richness
text1.count('love')


def lexical_diversity(text):
    return len(set(text)) / len(text)
from nltk.book import text1 as t1
from nltk.book import text4 as t4
print '================================='
'''
下载测试数据
'''
# nltk.download()

print '===============查找关键词=================='
t1.concordance("america")

print '===============查找相似上下文==============='
t1.similar("america")

print '=============共同的语法结构================='
t1.common_contexts(['in', 'of'])

print '=================词汇分布图================='
t4.dispersion_plot(['citizens', 'democaracy', 'freedom', 'america'])

print '=================统计最常出现的词================'
freList = nk.FreqDist(t1)
freList.plot(50, cumulative=False)

print '=================统计长度超过15的词==============='
v = set(t1)
long_words = filter(lambda x: len(x) > 15, v)[:10]
print long_words

print '=================常用双连词搭配==============='
tuple = nk.bigrams(['all', 'in', 'of', 'take', 'like'])
print("hello world")
import nltk
nltk.download('book')
from nltk.book import text1
text1 = nltk.book.text1
text1.concordance('monstrous')  # busca concordancias en el text1
text1.similar('monstrous')
text2 = nltk.book.text2
text2.similar('monstrous')
text2.common_contexts(["monstrous", "very"])
text1.common_contexts(["monstrous", "whale"])
nltk.book.text4.dispersion_plot(
    ["citizens", "democracy", "freedom", "duties", "America"])
nltk.book.text3.generate()

# Counting Vocabulary
len(text1)
len(text2)

# Tokens (individual unit of text) and Vocabulary (distinct unit)
len(nltk.book.text3)  # count tokens
len(set(nltk.book.text3))  # vocabulary

#   lexical richnes of a text
len(set(text2)) / len(text2)


# Function
def lexical_diversity(text):
    return len(set(text)) / len(text)