Python FreqDist.pop Examples

Programming Language: Python

Namespace/Package Name: nltk

Class/Type: FreqDist

Method/Function: pop

Examples at hotexamples.com: 6

Python FreqDist.pop - 6 examples found. These are the top rated real world Python examples of nltk.FreqDist.pop extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

keys(30)

N(30)

values(30)

update(30)

plot(30)

most_common(30)

FreqDist(30)

items(30)

freq(30)

inc(26)

hapaxes(25)

B(22)

get(22)

max(18)

iteritems(7)

pop(6)

copy(5)

tabulate(4)

samples(3)

__delitem__(2)

pformat(2)

sort_values(2)

has_key(1)

__init__(1)

transpose(1)

sort(1)

pprint(1)

reverse(1)

reset_index(1)

r_Nr(1)

_cumulative_frequencies(1)

clear(1)

elements(1)

insert(1)

viewkeys(1)

Example #1

Show file

def chapter2_exercise17():
    stopwords = stopwords.words("english")
    top_50 = FreqDist(brown.words(categories='romance'))
    temp = top_50.copy()
    for word in temp:
        if word in top_50 and word in stopwords:
            top_50.pop(word)
    return top_50.most_common(50)

Example #2

Show file

File: nlp_utils.py Project: filipmarkoski/corona-virus-analysis

def generate_vocab(tokens: list, min_token_len: int = 2, threshold: int = 2, remove_numbers=True):
    freq_dist = FreqDist(tokens)
    if remove_numbers:
        remove_digit_tokens(freq_dist)
    tokens = preprocess_tokens(tokens=list(freq_dist.keys()), min_token_len=min_token_len)
    removed_tokens = set(freq_dist.keys()).difference(tokens)
    for t in removed_tokens:
        freq_dist.pop(t, None)
    [freq_dist.pop(t, None) for t in tokens if freq_dist[t] < threshold]
    return freq_dist

Example #3

Show file

def chapter2_exercise18():
    freq_dist = FreqDist(brown.words(categories='humor'))
    stopwords_list = stopwords.words("english")
    for word in freq_dist.copy():
        if word in freq_dist and (not word.isalpha()
                                  or word in stopwords_list):
            freq_dist.pop(word)
    bigrams_dist = FreqDist([
        (item1, item2)
        for item1, item2 in nltk.bigrams(brown.words(categories='humor'))
        if item1 in freq_dist and item2 in freq_dist
    ])
    return bigrams_dist.most_common(50)

Example #4

Show file

print("Collected data from " + str(count_submission) + " submissions and " +
      str(count_comment) + " comments.")

#Use nltk to convert string into tokens (words and puncuation)
tokens = word_tokenize(raw_text)

#Use nltk to find frequency distribution of words
fdist = FreqDist(tokens)

#Remove tokens with only 1 or 2 characters
short_words = []
for i in fdist:
    if len(i) < 3:
        short_words.append(i)
for s in short_words:
    fdist.pop(s)

#Remove common but useless tokens
stop = set(stopwords.words('english'))  #get pre-defined stop words
additional_stop = ['https', 'http', 'n\'t', 'The', 'This', 'That',
                   '...']  #add additional stop words
for a in additional_stop:
    stop.add(a)
for s in stop:
    try:
        fdist.pop(s)
    except:
        pass

#Needs some work here to stem the tokens

Example #5

Show file

punctuations = [",", ".", '" "', ';', '-', ':', '."', '"', "'"]
# here for and if loop are used to filter out punctuation and stop words
for words in clintonwords:
    #to filter stop words
    if words not in englishstopwords:
        #to filter stopwords (first letter capitalzie)
        if words not in englishcapitalize:
            #to filter punctuations
            if words not in punctuations:
                frequentlyOccuring.append(words)

    else:
        pass

#print(frequentlyOccuring)
#create Freuency Distribution Class
frequencydist = FreqDist(frequentlyOccuring)
#print 50 frequent words
print(frequencydist.most_common(50))
print("Plot the top 50 words")
#creating plot
frequencydist.plot(50)
print(
    "Find out how many times the words world and america were used in the speech :"
)
worldcount = frequencydist.pop("world")
americacount = frequencydist.pop('America')
print("Count of America :", americacount)
print("Count of world   :", worldcount)

Example #6

Show file

File: warmUp.py Project: osamaKhan99/Computer-Graphics-Assignment

totalNumOfWords = len(inaugural.words('1993-Clinton.txt'))
print("Total number of words in 1993-Clinton's Speech = ", totalNumOfWords)

print("Total Distinct Words = ", len(set(inaugural.words('1993-Clinton.txt'))))

count = 0
for i in inaugural.words('1993-Clinton.txt'):
    count = count + len(i)
print("Average Length of Words = ", round(count / totalNumOfWords))

########################################

print(inaugural.words('1993-Clinton.txt'))

allWords = inaugural.words('1993-Clinton.txt')
lowerCase = [i for i in allWords if i.islower()]
freqDist = FreqDist(lowerCase)
print(freqDist)
print(freqDist.most_common(10))

stopWords = stopwords.words("english")
notStopWords = [i for i in allWords if i not in stopWords]
freqDist02 = FreqDist(notStopWords)
print(freqDist02.most_common(10))

plotWords = FreqDist(allWords)
plotWords.plot(10)

print("World " + str(freqDist02.pop("world")))
print("america " + str(freqDist02.pop("America")))