outcomes = {}
f = open("list-corrs", "r")
for line in f:
    logEntry = json.loads(line)
    if logEntry['type'] == "exclude":
        for metricName in metricNames:
            o = outcomes.get(metricName, [])
            o.append((logEntry['cat-name'], logEntry['expert-corrs'][metricName], logEntry['diffs'][metricName]))
            outcomes[metricName] = o

for metricName in metricNames:
    outcomesForMetric = outcomes[metricName]
    s = sorted(filter(lambda x: not math.isnan(x[1]), outcomesForMetric), key=lambda x: x[2])

    x = metricName + "\t"
    x += (",\t".join(map(lambda x: "%s:\t%1.3f" % x, map(lambda x: (x[0], x[2]), reversed(s[-4:-1])))))
    x += ",\t...,\t"
    x += (",\t".join(map(lambda x: "%s:\t%1.3f" % x, map(lambda x: (x[0], x[2]), reversed(s[0:3])))))
    l.log(x)

    """l.log("best settings for " + metricName + ":")
    l.log("\n".join(map(lambda x: "%-50s, %1.3f, %1.3f" % x, reversed(s[-6:-1]))))
    l.log("")
    l.log("worst settings for " + metricName + ":")
    l.log("\n".join(map(lambda x: "%-50s, %1.3f, %1.3f" % x, s[0:5])))
    l.log("\n")"""

l.close()

    s, p = stats.spearmanr(x, y)

    baseline[metricName] = s

count = 0
count2 = 0
for ind, metricName in zip(range(0, len(metricNames)), metricNames):
    pairs = []
    for doc in docs:
        tweetScore = 0
        for usr in doc[0]:
            if usr in userListScore:
                tweetScore+=userListScore[usr]
                count2 += 1
            else:
                count += 1

        # numExpertTweets = sum((userListScore.get(usr, 0) for usr in doc[0]))
        
        metricScore = doc[1][ind]
        pairs.append([tweetScore, metricScore])

    x, y = zip(*pairs)
    s, p = stats.spearmanr(x, y)

    l.log("%-20s: %1.3f %1.3f" % (metricName, s, (s-baseline[metricName])))

print count / len(metricNames)
print count2 / len(metricNames)

l.close()
    patrickExperts = getPatrickExperts(topCategory)
    print topCategory
    print len(patrickExperts.intersection(usersInTimewindow))
    print """""

#print len(wordExperts)
#print len(patrickExperts)
#print len(listWeights)
#print len(usersInTimewindow)

print ""
#experts = wordExperts.intersection(usersInTimewindow).intersection(bioExperts)
#experts = wordExperts.intersection(usersInTimewindow).intersection(patrickExperts).intersection(set(listWeights.keys()))
#experts = dict(map(lambda x: (x, listWeights[x]), experts))


experts = wordExperts


totalNumExpertTweets = sum((1 for doc in docs for u in doc[0] if u in experts))
baseline = getBaseline(docs, metricNames, len(experts))

corrs = correlationWrtUsers(docs, experts, metricNames)

l.log("number of experts: " + str(len(experts)))
l.log("number of tweets: " + str(totalNumExpertTweets))
l.log("baseline: " + str(baseline))
l.log("diff: " + str(corrComparision(baseline, corrs)))

l.close()
    map(lambda userKV: userKV[0], filter(lambda userKV: 0 <= userKV[1] <= 4, usersTweetFrequence.items()))
)


baseline = {}
for ind, metricName in zip(range(0, len(metricNames)), metricNames):
    pairs = []
    for doc in docs:
        numTweets = len(doc[0])
        metricScore = doc[1][ind]
        pairs.append([numTweets, metricScore])

    x, y = zip(*pairs)
    s, p = stats.spearmanr(x, y)

    baseline[metricName] = s

for ind, metricName in zip(range(0, len(metricNames)), metricNames):
    pairs = []
    for doc in docs:
        numExpertTweets = sum((1 for usr in doc[0] if (not usr in negativeUsernames) and (not usr in lowTweetUsers)))
        metricScore = doc[1][ind]
        pairs.append([numExpertTweets, metricScore])

    x, y = zip(*pairs)
    s, p = stats.spearmanr(x, y)

    print "%-20s: %1.3f     %+1.3f" % (metricName, s, s - baseline[metricName])

l.log("\n\n")