Ejemplo n.º 1
0
def answer_four():
    from nltk.book import FreqDist
    token_dict = FreqDist(moby_tokens)
    new_list = []
    for val in token_dict.keys():
        if len(val) > 5 and token_dict[val] > 150:
            new_list.append(val)
    new_list.sort()
    return new_list  # Your answer here
Ejemplo n.º 2
0
def answer_four():

    from nltk.book import FreqDist
    token_dict = FreqDist(moby_tokens)
    res_lis = []
    for w in token_dict.keys():
        if len(w) > 5 and token_dict[w] > 150:
            res_lis.append(w)
    res_lis.sort()
    return res_lis
Ejemplo n.º 3
0
def answer_five():
    from nltk.book import FreqDist
    token_dict = FreqDist(moby_tokens)
    max_len = 0
    for w in token_dict.keys():
        if len(w) > max_len:
            max_word = w
            max_len = len(w)
    tups = (max_word, max_len)

    return tups  # Your answer here
Ejemplo n.º 4
0
def answer_six():
    import operator
    from nltk.book import FreqDist
    token_dict = FreqDist(moby_tokens)
    liss = {}
    for w in token_dict.keys():
        if w.isalpha() and token_dict[w] > 2000:
            liss[w] = token_dict[w]
    sortedlist = sorted(liss.items(), key=operator.itemgetter(1), reverse=True)
    finale = [(f, w) for w, f in sortedlist]

    return finale  # Your answer here
Ejemplo n.º 5
0
def answer_six():
    import operator
    from nltk.book import FreqDist
    token_dict = FreqDist(moby_tokens)
    res_lis = {}
    for w in token_dict.keys():
        if w.isalpha() and token_dict[w] > 2000:
            res_lis[w] = token_dict[w]
    sorted_res_list = sorted(res_lis.items(), key=operator.itemgetter(1))
    sorted_res_list.reverse()
    result = [(f, w) for w, f in sorted_res_list]
    return result
Ejemplo n.º 6
0
freq_150 = sorted([key for key in freq if len(key) > 5 and freq[key] > 150])
freq_150
#ANSWER 5

words = list(set(text1))
longest = ''
for word in words:
    if len(word) > len(longest):
        longest = word
(longest, len(longest))

#ANSWER6
freq = FreqDist(text1)
freq_2000 = sorted(
    [(freq[word], word)
     for word in freq.keys() if freq[word] > 2000 and word.isalpha()],
    reverse=True)
freq_2000

#ANSWER7

sents = nltk.sent_tokenize(moby_raw)
ratio = len(text1) / len(sents)
ratio

#ANSWER8

tags = nltk.pos_tag(text1)
tag_dict = dict()

for tag in tags:
Ejemplo n.º 7
0
def question_six():
    dist = FreqDist(text1)
    frequentwords = [
        w for w in list(dist.keys()) if w.isalpha() and dist[w] > 2000
    ]
    return sorted([(dist[w], w) for w in frequentwords], reverse=True)
Ejemplo n.º 8
0
def question_four():
    dist = FreqDist(text1)
    frequentwords = [
        w for w in list(dist.keys()) if len(w) > 5 and dist[w] > 150
    ]
    return sorted(frequentwords)