'@Tags': { '$each': tags } }}) def tags_list_to_dict(user): if '@Id' not in user or '@Tags' not in user: return tag_dict = {} for tag in user['@Tags']: if tag not in tag_dict: tag_dict[tag] = 1 else: tag_dict[tag] += 1 upd_handler.db.users.update({'@Id': user['@Id']}, {'$set': { '@Tags': tag_dict }}) # Iterate overall posts; append the post-tags to the owner user's prof # for post in tqdm(db.get_post()): # if '@OwnerUserId' in post and '@Tags' in post: # append_tags(post['@OwnerUserId'], post['@Tags']) # Iterate overall users; convert their list of tags to a freq-dist for user in tqdm(list(db.get_user({'@Tags.1': {'$exists': True}}))): tags_list_to_dict(user)
import matplotlib.pyplot as plt from wordcloud import WordCloud from dbm import DBM dbm = DBM() #Get all tags from all users all_users = dbm.get_user(flt={'tags': {'$exists': 1}}) all_tags = "" norm_tags = {} for user in all_users: for tag in user['tags']: all_tags += tag['name'] + " " if tag['name'] in norm_tags: norm_tags[tag['name']] += tag['count'] else: norm_tags[tag['name']] = tag['count'] print("Number of distinct tags:", len(norm_tags)) # Generate a non-normalized tag cloud image wordcloud = WordCloud(width=700, height=500, stopwords=['n'], normalize_plurals=False, max_words=1000).generate(all_tags) plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") plt.title("Non Normalized Tag-Cloud") # Generate a Normalized tag cloud image