print stat[0] for statistic in statistics: print statistic[0] + ": " + str(statistic[1](valuesForStat)) print "\n\n\n" for ind, stat in zip(range(0, len(stats)), stats): valuesForStat = filter(lambda x: x!=None, map(lambda x: x[ind], positiveMatrix)) print stat[0] for statistic in statistics: print statistic[0] + ": " + str(statistic[1](valuesForStat)) """ userListTopics = json.load(open("data/userListTopics.json")) userTopics = { } topicCounts = { } for item in userListTopics: if "topics" in item: topics = map(lambda topic: topic["topicName"], item["topics"]) userTopics[item["userName"]] = topics for topic in topics: topicCounts[topic] = topicCounts.get(topic, 0) + 1 fraquentLists = map(lambda x: x[0], filter(lambda x: x[1] > 50, topicCounts.items())) negativeUserTopics = filter(lambda x: x!= None, map(lambda username: userTopics.get(username, None), negativeUsernames)) positiveUserTopics = filter(lambda x: x!= None, map(lambda username: userTopics.get(username, None), positiveUsernames)) print "\n".join(map(lambda x: ("%s" % (x[0])), filter(lambda x: x[0] in fraquentLists and x[1]>1.0, sorted(oddsRatioForEachFactor(positiveUserTopics, negativeUserTopics), key=lambda x: x[1]))))
s = userLists.get(row[0], set()) listname = filter(lambda c: c in string.printable, row[1]).strip().lower() s.add(listname) userLists[row[0]] = s print len(userLists) userLists = dict(map(lambda kv: (kv[0], list(kv[1])), userLists.items())) print len(userLists) f2 = open("user_lists.json", "w") f2.write(json.dumps(userLists)) f2.close()""" f = open("user_tweet_concepts.json", "r") j = json.load(f) counts = { } for username, words in j.items(): if words != None: for word in words: counts[word] = counts.get(word, 0) + 1 positiveWords = filter(lambda x: x != None, map(lambda x: x[1], filter(lambda x: x[0] in positiveUsers, j.items()))) negativeWords = filter(lambda x: x != None, map(lambda x: x[1], filter(lambda x: x[0] in negativeUsers, j.items()))) ors = oddsRatioForEachFactor(positiveWords, negativeWords) ors = sorted(ors, key=lambda x: counts[x[0]], reverse=True)[0:3000] print len(ors) sortedOrs = map(lambda x: x[0], sorted(ors, key=lambda x: x[1])) print ", ".join(sortedOrs[0:20]) print ", ".join(reversed(sortedOrs[-20:]))