return math.log(len(bloblist) / (1 + n_containing(word, bloblist)))


def tfidf(word, blob, bloblist):
    return tf(word, blob) * idf(word, bloblist)


bloblist = [text for text in df.head(100)['body']]
for i, blob in enumerate(bloblist):
    print("Top words in document {}".format(i + 1))
    scores = {word: tfidf(word, blob, bloblist) for word in blob.words}
    sorted_words = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    for word, score in sorted_words[:3]:
        print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5)))

from nltk.sentiment import SentimentAnalyzer
sid = SentimentAnalyzer()
for sentence in bloblist:
    print(sentence)
    ss = sid.polarity_scores(sentence)
    for k in sorted(ss):
        print('{0}: {1}, '.format(k, ss[k]), end='')
        print()

sentim_analyzer = SentimentAnalyzer()
all_words_neg = sentim_analyzer.all_words([doc for doc in bloblist])

tokens = df['tokens'][2]
tokens
tagged = nltk.pos_tag(tokens)
import nltk
from nltk.sentiment import SentimentAnalyzer
'''
In order to use VADER method for sentiment analisys with nltk library, we need to download
an appropriate package:

nltk.download("vader_lexicon")

This is a one-time operation
'''
#nltk.download("vader_lexicon")
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SentimentAnalyzer
#1. istantiate class
eng_nltk_sa_class = SentimentAnalyzer()
text = "I love this app."
result = eng_nltk_sa_class.polarity_scores(text)
#Valuate sentiment using
print("*) Original phrase")
print(eng_nltk_sa_class)
print("*) NLTK SentimentAnalyzer - type")
print(type(eng_nltk_sa_class))
print("*) NLTK SentimentAnalyzer - result")
print(result)
print("*) NLTK SentimentAnalyzer - result type")
print(type(result))
print("*) NLTK SentimentAnalyzer - result size")
print(str(len(result)))
'''
{'neg': 0.0, 'neu': 0.323, 'pos': 0.677, 'compound': 0.6369}
neg: negative words point
neu: neutral words point