def crossrefVsTwitter(yearBounds = [None, None], minTweetAge = None, maxTweetAge = None):
    tweetVsCrossrefList = []
    minTweetAge = 60*60*24*0
    maxTweetAge = 60*60*24*100

    totalDocs = 0
    totalTweets = 0
    nullWeights = 0
    nonNullWeights = 0
    for doc in filter(
        lambda doc: 
            (doc.publicationDatetime().year==2012 and 
            doc.publicationDatetime().month>=6 and doc.publicationDatetime().month<=8),
        SimpleDoc.getall()
    ):
        docsTweets = filter(lambda tweet: 
            (not minTweetAge or (tweet.timestamp-doc.publicationTimestamp) >= minTweetAge) and
            (not maxTweetAge or (tweet.timestamp-doc.publicationTimestamp) <= maxTweetAge), 
            doc.tweets)
        
        def userWeight(tweet):
            user = tweet.user()
            return None if user is None else user.weight()

        userWeights = map(lambda tweet: userWeight(tweet), docsTweets)

        nullWeights += sum((1 for weight in userWeights if weight is None))
        nonNullWeights += sum((1 for weight in userWeights if not weight is None))

        tweetVsCrossrefList.append([doc.numCrossrefs(), 0 if len(userWeights) == 0 else sum(filter(lambda weight: weight != None, userWeights))])
        totalDocs += 1
        totalTweets += len(docsTweets)

    print totalDocs
    print totalTweets
    print float(nullWeights) / (nullWeights+nonNullWeights)

    # tweetVsCrossrefList = sorted(tweetVsCrossrefList, key=lambda tc: tc[1], reverse=True)[:100]
    x, y = zip(*tweetVsCrossrefList)
    paperFigure(plt)
    plt.scatter(x, y)
    # plt.title("Korrelation zwischen Tweets und Zitationen (Papieren zwischen " + str(yearBounds[0]) + " und " + str(yearBounds[1]) + "; #Docs: " + str(totalDocs) + ")")
    plt.ylabel("#Tweets")
    plt.xlabel("#Crossrefs")
    #plt.xlim((0,200))
    #plt.ylim((0,30))

    p = numpy.polyfit(x, y, 1)
    xTrend = range(min(x), max(x)+1)
    yTrend = map(lambda x: numpy.polyval(p, x), xTrend)
    plt.plot(xTrend, yTrend, color='r')

    # plt.figtext(0.80, 0.05,  'korrelationskoeffizient: ' + str(korrelationskoeffizient(x, y)))
    print 'korrelationskoeffizient: ' + str(korrelationskoeffizient(x, y))
    plt.tight_layout()
    plt.show()
from main.util.common import Sentiment
from main.util.plotting import paperFigure
import matplotlib.pyplot as plt

sentiments = Sentiment.fromFile("sentiments_all.json")

x = []
classes = ["1", "2", "3", "4"]
for index, classif in zip(range(0, len(classes)), classes):
    xn = []
    for sentiment in filter(lambda s: s.classification==classif, sentiments):
        doc = sentiment.doc()
        publicationTimestamp = doc.publicationTimestamp
        tweetTimestamp = sentiment.id_tweetTimestamp
        classification = sentiment.classification

        print int(tweetTimestamp)
        xn.append((int(tweetTimestamp)-publicationTimestamp)/(60*60*24))

    x.append(xn)

paperFigure(plt)
plt.hist(x, label=["negative", "neutral", "positive", "t+l"], bins = [ -10, 0, 5, 10, 30, 100, 300 ], normed=True)
plt.legend()
plt.show()