예제 #1
0
                                    '@Tags': {
                                        '$each': tags
                                    }
                                }})


def tags_list_to_dict(user):
    if '@Id' not in user or '@Tags' not in user:
        return
    tag_dict = {}
    for tag in user['@Tags']:
        if tag not in tag_dict:
            tag_dict[tag] = 1
        else:
            tag_dict[tag] += 1

    upd_handler.db.users.update({'@Id': user['@Id']},
                                {'$set': {
                                    '@Tags': tag_dict
                                }})


# Iterate overall posts; append the post-tags to the owner user's prof
# for post in tqdm(db.get_post()):
#     if '@OwnerUserId' in post and '@Tags' in post:
#         append_tags(post['@OwnerUserId'], post['@Tags'])

# Iterate overall users; convert their list of tags to a freq-dist
for user in tqdm(list(db.get_user({'@Tags.1': {'$exists': True}}))):
    tags_list_to_dict(user)
예제 #2
0
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from dbm import DBM

dbm = DBM()

#Get all tags from all users
all_users = dbm.get_user(flt={'tags': {'$exists': 1}})
all_tags = ""
norm_tags = {}
for user in all_users:
    for tag in user['tags']:
        all_tags += tag['name'] + " "
        if tag['name'] in norm_tags:
            norm_tags[tag['name']] += tag['count']
        else:
            norm_tags[tag['name']] = tag['count']

print("Number of distinct tags:", len(norm_tags))

# Generate a non-normalized tag cloud image
wordcloud = WordCloud(width=700,
                      height=500,
                      stopwords=['n'],
                      normalize_plurals=False,
                      max_words=1000).generate(all_tags)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title("Non Normalized Tag-Cloud")

# Generate a Normalized tag cloud image