outcomes = {} f = open("list-corrs", "r") for line in f: logEntry = json.loads(line) if logEntry['type'] == "exclude": for metricName in metricNames: o = outcomes.get(metricName, []) o.append((logEntry['cat-name'], logEntry['expert-corrs'][metricName], logEntry['diffs'][metricName])) outcomes[metricName] = o for metricName in metricNames: outcomesForMetric = outcomes[metricName] s = sorted(filter(lambda x: not math.isnan(x[1]), outcomesForMetric), key=lambda x: x[2]) x = metricName + "\t" x += (",\t".join(map(lambda x: "%s:\t%1.3f" % x, map(lambda x: (x[0], x[2]), reversed(s[-4:-1]))))) x += ",\t...,\t" x += (",\t".join(map(lambda x: "%s:\t%1.3f" % x, map(lambda x: (x[0], x[2]), reversed(s[0:3]))))) l.log(x) """l.log("best settings for " + metricName + ":") l.log("\n".join(map(lambda x: "%-50s, %1.3f, %1.3f" % x, reversed(s[-6:-1])))) l.log("") l.log("worst settings for " + metricName + ":") l.log("\n".join(map(lambda x: "%-50s, %1.3f, %1.3f" % x, s[0:5]))) l.log("\n")""" l.close()
s, p = stats.spearmanr(x, y) baseline[metricName] = s count = 0 count2 = 0 for ind, metricName in zip(range(0, len(metricNames)), metricNames): pairs = [] for doc in docs: tweetScore = 0 for usr in doc[0]: if usr in userListScore: tweetScore+=userListScore[usr] count2 += 1 else: count += 1 # numExpertTweets = sum((userListScore.get(usr, 0) for usr in doc[0])) metricScore = doc[1][ind] pairs.append([tweetScore, metricScore]) x, y = zip(*pairs) s, p = stats.spearmanr(x, y) l.log("%-20s: %1.3f %1.3f" % (metricName, s, (s-baseline[metricName]))) print count / len(metricNames) print count2 / len(metricNames) l.close()
patrickExperts = getPatrickExperts(topCategory) print topCategory print len(patrickExperts.intersection(usersInTimewindow)) print """"" #print len(wordExperts) #print len(patrickExperts) #print len(listWeights) #print len(usersInTimewindow) print "" #experts = wordExperts.intersection(usersInTimewindow).intersection(bioExperts) #experts = wordExperts.intersection(usersInTimewindow).intersection(patrickExperts).intersection(set(listWeights.keys())) #experts = dict(map(lambda x: (x, listWeights[x]), experts)) experts = wordExperts totalNumExpertTweets = sum((1 for doc in docs for u in doc[0] if u in experts)) baseline = getBaseline(docs, metricNames, len(experts)) corrs = correlationWrtUsers(docs, experts, metricNames) l.log("number of experts: " + str(len(experts))) l.log("number of tweets: " + str(totalNumExpertTweets)) l.log("baseline: " + str(baseline)) l.log("diff: " + str(corrComparision(baseline, corrs))) l.close()
map(lambda userKV: userKV[0], filter(lambda userKV: 0 <= userKV[1] <= 4, usersTweetFrequence.items())) ) baseline = {} for ind, metricName in zip(range(0, len(metricNames)), metricNames): pairs = [] for doc in docs: numTweets = len(doc[0]) metricScore = doc[1][ind] pairs.append([numTweets, metricScore]) x, y = zip(*pairs) s, p = stats.spearmanr(x, y) baseline[metricName] = s for ind, metricName in zip(range(0, len(metricNames)), metricNames): pairs = [] for doc in docs: numExpertTweets = sum((1 for usr in doc[0] if (not usr in negativeUsernames) and (not usr in lowTweetUsers))) metricScore = doc[1][ind] pairs.append([numExpertTweets, metricScore]) x, y = zip(*pairs) s, p = stats.spearmanr(x, y) print "%-20s: %1.3f %+1.3f" % (metricName, s, s - baseline[metricName]) l.log("\n\n")