コード例 #1
0
def userCorrelationToDiscipline():
    """
    zuerst user_disc_map erstellen:
    [ user1 : [ 
        [mendDisc1_1, mendDisc1_2, ...], // Liste von Disziplinen pro Tweet des Nutzers
        [mendDisc2_1, mendDisc2_2, ...]
    ], user2: [
        ...
    ] ]
    """
    if not os.path.isfile(dataPath("user_disc_map.json")):
        userDiscList = []

        for doc in SimpleDoc.getall():
            twitterUsers = [tweet.user for tweet in doc.tweets]
            disciplines = doc.mendeleyDisciplines
            if len(twitterUsers)!=0 and disciplines!=None and len(disciplines)!=0:
                for twitterUser in twitterUsers:
                    userDiscList.append([twitterUser, disciplines])
        
        userDiscMap = {}
        for item in userDiscList:
            discList = userDiscMap.get(item[0], [])
            discList.append(item[1])
            userDiscMap[item[0]] = discList

        writeJsonToData(userDiscMap, "user_disc_map.json")
    else:
        userDiscMap = readJsonFromData("user_disc_map.json")


    """
    dann "user_disc_count_map" erstellen:
    [ user1 : { 
        "total_posts" : n,
        "user_posts_in_desc" : {
            "disc1" : n_1,
            "disc2" : n_2, 
            ...
        }
    }, user2: {
        ...
    } ]
    """
    if not os.path.isfile(dataPath("user_disc_count_map.json")):
        userDiscCountMap = { }
        for user, descListList in userDiscMap.items():
            totalPosts = len(descListList)
            allUsersDesc = set()
            for descList in descListList:
                allUsersDesc |= set(descList)

            userPostsInDesc = { }
            for desc in allUsersDesc:
                postsInDesc = sum(1 for descList in descListList if desc in descList)
                userPostsInDesc[desc] = postsInDesc

            userDiscCountMap[user] = { "total_posts" : totalPosts, "user_posts_in_desc" : userPostsInDesc }

        writeJsonToData(userDiscCountMap, "user_disc_count_map.json")
    else:
        userDiscCountMap = readJsonFromData("user_disc_count_map.json")

    for user, userdata in userDiscCountMap.items():
        totalPosts = userdata['total_posts']

        relCounts = []
        for desc, count in userdata['user_posts_in_desc'].items():
            relCounts.append([desc, float(count)/totalPosts])

        relCounts = sorted(relCounts, key=lambda x: x[1], reverse=True)

        if totalPosts > 50:
            print user
            print relCounts
            print "\n\n"
コード例 #2
0
numTweetsPerUserFilename = "num_tweets_per_user.json"
if not path.isfile(dataPath(numTweetsPerUserFilename)):
    numTweetsPerUser = {}

    def getRelevantData(doc):
        global userCount

        twitterData = doc[2]
        for tweet in twitterData:
            user = tweet[1]
            usersTotalTweets = numTweetsPerUser.get(user, 0) + 1
            numTweetsPerUser[user] = usersTotalTweets

    doForEachSimpleDoc(getRelevantData)

    writeJsonToData(numTweetsPerUser, numTweetsPerUserFilename)
else:
    numTweetsPerUser = readJsonFromData(numTweetsPerUserFilename)

hist = numpy.histogram(list(numTweetsPerUser.itervalues()), [1, 2, 3, 4, 5, 10, 20, 100, 500, 1000])

print "\n" * 3

# Tweet Histogaram
print "Tweet Histogram:"
formatHist(hist[0], hist[1], 6)


print "\n" * 3

# Top X Tweeters
コード例 #3
0
import json

from os import listdir
from os.path import isfile, join
from os.path import basename
from main.util.common import plosDataFiles, plosDataBaseDir, readAsJson, writeJsonToData, doForEachPlosDoc

users = []

def getRelevantData(plosDoc):
    global users
    sources = plosDoc['sources']
    for source in sources:
        if source['name'] == 'twitter':
            events = source['events']
            for event in events:
                user = event['event']['user']
                users.append(user)


doForEachPlosDoc(getRelevantData)

writeJsonToData(users, "users.json")