def userCorrelationToDiscipline(): """ zuerst user_disc_map erstellen: [ user1 : [ [mendDisc1_1, mendDisc1_2, ...], // Liste von Disziplinen pro Tweet des Nutzers [mendDisc2_1, mendDisc2_2, ...] ], user2: [ ... ] ] """ if not os.path.isfile(dataPath("user_disc_map.json")): userDiscList = [] for doc in SimpleDoc.getall(): twitterUsers = [tweet.user for tweet in doc.tweets] disciplines = doc.mendeleyDisciplines if len(twitterUsers)!=0 and disciplines!=None and len(disciplines)!=0: for twitterUser in twitterUsers: userDiscList.append([twitterUser, disciplines]) userDiscMap = {} for item in userDiscList: discList = userDiscMap.get(item[0], []) discList.append(item[1]) userDiscMap[item[0]] = discList writeJsonToData(userDiscMap, "user_disc_map.json") else: userDiscMap = readJsonFromData("user_disc_map.json") """ dann "user_disc_count_map" erstellen: [ user1 : { "total_posts" : n, "user_posts_in_desc" : { "disc1" : n_1, "disc2" : n_2, ... } }, user2: { ... } ] """ if not os.path.isfile(dataPath("user_disc_count_map.json")): userDiscCountMap = { } for user, descListList in userDiscMap.items(): totalPosts = len(descListList) allUsersDesc = set() for descList in descListList: allUsersDesc |= set(descList) userPostsInDesc = { } for desc in allUsersDesc: postsInDesc = sum(1 for descList in descListList if desc in descList) userPostsInDesc[desc] = postsInDesc userDiscCountMap[user] = { "total_posts" : totalPosts, "user_posts_in_desc" : userPostsInDesc } writeJsonToData(userDiscCountMap, "user_disc_count_map.json") else: userDiscCountMap = readJsonFromData("user_disc_count_map.json") for user, userdata in userDiscCountMap.items(): totalPosts = userdata['total_posts'] relCounts = [] for desc, count in userdata['user_posts_in_desc'].items(): relCounts.append([desc, float(count)/totalPosts]) relCounts = sorted(relCounts, key=lambda x: x[1], reverse=True) if totalPosts > 50: print user print relCounts print "\n\n"
numTweetsPerUserFilename = "num_tweets_per_user.json" if not path.isfile(dataPath(numTweetsPerUserFilename)): numTweetsPerUser = {} def getRelevantData(doc): global userCount twitterData = doc[2] for tweet in twitterData: user = tweet[1] usersTotalTweets = numTweetsPerUser.get(user, 0) + 1 numTweetsPerUser[user] = usersTotalTweets doForEachSimpleDoc(getRelevantData) writeJsonToData(numTweetsPerUser, numTweetsPerUserFilename) else: numTweetsPerUser = readJsonFromData(numTweetsPerUserFilename) hist = numpy.histogram(list(numTweetsPerUser.itervalues()), [1, 2, 3, 4, 5, 10, 20, 100, 500, 1000]) print "\n" * 3 # Tweet Histogaram print "Tweet Histogram:" formatHist(hist[0], hist[1], 6) print "\n" * 3 # Top X Tweeters
import json from os import listdir from os.path import isfile, join from os.path import basename from main.util.common import plosDataFiles, plosDataBaseDir, readAsJson, writeJsonToData, doForEachPlosDoc users = [] def getRelevantData(plosDoc): global users sources = plosDoc['sources'] for source in sources: if source['name'] == 'twitter': events = source['events'] for event in events: user = event['event']['user'] users.append(user) doForEachPlosDoc(getRelevantData) writeJsonToData(users, "users.json")