Python FreqDist.pop Beispiele

Programmiersprache: Python

Namespace / Paketname: nltk.probability

Klasse / Typ: FreqDist

Methode / Funktion: pop

Beispiele auf hotexamples.com: 7

Python FreqDist.pop - 7 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die nltk.probability.FreqDist.pop, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

values(30)

freq(30)

plot(30)

most_common(30)

keys(30)

items(30)

inc(30)

iteritems(30)

N(30)

update(27)

get(24)

B(19)

hapaxes(14)

max(11)

FreqDist(11)

tabulate(9)

samples(8)

pprint(7)

pop(4)

has_key(3)

count(3)

append(2)

sorted_samples(1)

read(1)

sorted(1)

remove(1)

copy(1)

r_Nr(1)

next(1)

__iter__(1)

iterkeys(1)

__setitem__(1)

insert(1)

_cumulative_frequencies(1)

__init__(1)

Beispiel #1

Datei anzeigen

Datei: process_data_spark.py Projekt: ayat-rashad/eg_twitter

def filter_words(words):
    new_words = FreqDist(words)
    stopwords = get_stop_words('ar')
    keys = new_words.keys()
    
    for word in keys:
        if word in stopwords:
            new_words.pop(word)
            
        if len(word) <= 2:
            new_words.pop(word)
            
    return new_words

Beispiel #2

Datei anzeigen

def filter_words(words):
    new_words = FreqDist(words)
    stopwords = get_stop_words('ar')
    keys = new_words.keys()

    for word in keys:
        if word in stopwords:
            new_words.pop(word)

        if len(word) <= 2:
            new_words.pop(word)

    return new_words

Beispiel #3

Datei anzeigen

Datei: q2_1.py Projekt: atiassa/recommend-2011

 def worst_errors_many_wrong_decisions(self, k, feature_extractor):
     worst_errors = []
     features = []
     wrongDocs = self.error_prediction_docs(self.maintest, self.testClassify)
     for doc in wrongDocs:
         feature_dic = feature_extractor(movie_reviews.words(fileids=[doc]))
         features = features + feature_dic.keys()
     fd = FreqDist(feature.lower() for feature in features)
     for i in range(1, k+1):
         x = fd.max()
         fd.pop(x)
         worst_errors.append(x)
     return worst_errors

Beispiel #4

Datei anzeigen

 def worst_errors_many_wrong_decisions(self, k, feature_extractor):
     worst_errors = []
     features = []
     wrongDocs = self.error_prediction_docs(self.maintest,
                                            self.testClassify)
     for doc in wrongDocs:
         feature_dic = feature_extractor(movie_reviews.words(fileids=[doc]))
         features = features + feature_dic.keys()
     fd = FreqDist(feature.lower() for feature in features)
     for i in range(1, k + 1):
         x = fd.max()
         fd.pop(x)
         worst_errors.append(x)
     return worst_errors

Beispiel #5

Datei anzeigen

Datei: assignment.py Projekt: thelemonyfresh/patent_assignment

def word_count(text, exclude_inputlist):
    frequency = FreqDist(wd.lower() for wd in text if wd.isalpha())
    excludelist = stopwords.words('english') + exclude_inputlist
    for word in frequency.keys():
        if word in excludelist or frequency[word] < 2 or not word.isalpha(): frequency.pop(word)
    return frequency

Beispiel #6

Datei anzeigen

Datei: main.py Projekt: Ritvik-Gupta/NLP-Hindi

# removing numeric digits from list of words
filteredStopwords = [i for i in filteredStopwords if not i.isdigit()]
freqDist = FreqDist(filteredStopwords)

print("In HHBD Hindi Bible")
print(f"प्रेम appears for {freqDist['प्रेम']} times")
print(f"डर appears for {freqDist['डर']} times")

checkWords = ["यीशु", "मसीह", "उद्धारकर्ता", "उद्धार", "क्रूस"]
checkWordFreq = {}
for checkWord in checkWords:
    checkWordFreq[checkWord] = freqDist[checkWord]

print(checkWordFreq)

freqDist.pop("राजा", None)

sents = []
for i in text.split("॥"):
    sents.append(i.split("।"))
sents = [item for sublist in sents for item in sublist]

from collections import defaultdict

ranking = defaultdict(int)
for i, sent in enumerate(sents):
    for token in tokenizeWord(sent):
        if token in freqDist:
            ranking[i] += freqDist[token]

from heapq import nlargest

Beispiel #7

Datei anzeigen

    for x, y in itertools.zip_longest(f_list, f_list2, fillvalue=(0, 0)):

        rank_tup = tuple(['Rank:' + str(rank)])
        new_element = tuple([rank_tup + x])

        if x[1] > y[1]:
            ranked_list += new_element
            rank += 1

        elif x[1] == y[1]:
            ranked_list += new_element

    return ranked_list


print('Ranked frequency distribution in descending order of frequency',
      ranked_freq_dist(fdist))

# Removing stop words and punctuations from the words list.
filtered_words = [w for w in words if not w in stop_words and w.isalnum()]

# Creating a FreqDist object from the filtered list.
filtered_fdist = FreqDist(filtered_words)

# Plotting the top 10 words. I didnt plot the top 50 as the graph gets very hard to read on a small screen.
#filtered_fdist.plot(10, title='Frequency Distribution')

# Checking the number of occurences of the words 'America' and 'world'.
print('Occurences of \'America\':', filtered_fdist.pop('America'),
      '\nOccurences of \'world\':', filtered_fdist.pop('world'))