if type(node) is nltk.tree.Tree:
        # Get the type of entity
        label = node.label()
        entity = node[0][0]
        named_entities[label].append(entity)

named_entities

"""# TF-IDF"""

from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
import numpy as np
cv = CountVectorizer(min_df=0.005, max_df=.5, ngram_range=(1,2))
sentences = [' '.join(tokens) for tokens in clean_tokens_list]
cv.fit(sentences)

len(cv.vocabulary_)

cv_counts = cv.transform(sentences)

100.0 * cv_counts.nnz / (cv_counts.shape[0] * cv_counts.shape[1])

transformed_weights = TfidfTransformer().fit_transform(cv_counts)
features = {}
for feature, weight in zip(cv.get_feature_names(),
                           np.asarray(transformed_weights.mean(axis=0)).ravel().tolist()):
    features[feature] = weight
sorted_features = [(key, features[key]) 
                   for key in sorted(features, key=features.get, reverse=True)]

sorted_features[:10]