def removeRetweetsRelative():
    l = [ ]

    for doc in SimpleDoc.getall():
        if len(doc.tweets) >= 1:
            numNormalTweets = sum(1 for tweet in doc.tweets if not tweet.isRetweet())
            numRetweets = sum(1 for tweet in doc.tweets if tweet.isRetweet())

            l.append([numNormalTweets+numRetweets, numNormalTweets])

    diffs = map(lambda x: float(x[0]-x[1])/x[0], l)

    relBins, labels = pieData([
        [lambda x: x==0.0, "0%"], 
        [lambda x: x>0.0 and x<=0.3, "0%<d<=30%"],
        [lambda x: x>0.3 and x<=0.5, "30%<d<=50%"],
        [lambda x: x>0.5, ">50%"]
    ], diffs)

    plt.figure()
    plt.pie(relBins, autopct='%1.1f%%', 
        startangle=90, labels=labels)

    plt.title("Differenzen in Prozent, Retweets entfernt werden")
    plt.show()
def removeRetweetsAbsolute():
    l = [ ]

    for doc in SimpleDoc.getall():
        if len(doc.tweets) >= 1:
            numNormalTweets = sum(1 for tweet in doc.tweets if not tweet.isRetweet())
            numRetweets = sum(1 for tweet in doc.tweets if tweet.isRetweet())

            l.append([numNormalTweets, numRetweets])

    diffs = map(lambda x: x[0]-x[1], l)

    """relBins, labels = pieData([
        [lambda x: x==0, "diff: 0"], 
        [lambda x: x==1, "diff: 1"],
        [lambda x: x>=2 and x <= 5, "diff: 2-5"],
        [lambda x: x>5, "diff: >5"]
    ], diffs)"""

    relBins, labels = pieData([
        [lambda x: x>=6 and x <=10, "diff: 6-10"],
        [lambda x: x>=11 and x <=50, "diff: 11-50"],
        [lambda x: x>51, "diff: >51"]
    ], diffs)

    plt.figure()
    plt.pie(relBins, autopct='%1.1f%%', 
        startangle=90, labels=labels)

    plt.title("Differenzen in Anzahl Tweets zu Dokument, wenn Retweets entfernt werden.")
    plt.show()
def removeDoubleUsersAbsolute():
    l = [ ]

    for doc in SimpleDoc.getall():
        tweetUsers = map(lambda x: x.user, doc.tweets)
        if len(tweetUsers) >= 1:
            l.append([len(tweetUsers), len(set(tweetUsers))])

    diffs = map(lambda x: x[0]-x[1], l)

    """relBins, labels = pieData([
        [lambda x: x==0, "diff: 0"], 
        [lambda x: x==1, "diff: 1"],
        [lambda x: x>=2 and x <= 5, "diff: 2-5"],
        [lambda x: x>5, "diff: >5"]
    ], diffs)"""

    relBins, labels = pieData([
        [lambda x: x>=6 and x<=10, "diff: 5-10"],
        [lambda x: x>=11 and x<=50, "diff: 11-50"],
        [lambda x: x>=51 and x<=300, "diff: 51-300"]
    ], diffs)

    plt.figure()
    plt.pie(relBins, autopct='%1.1f%%', 
        startangle=90, labels=labels)

    plt.title("Differenzen in Anzahl Tweets zu Dokument, wenn doppelte Benutzer entfernt werden.")
    plt.show()
def alteringTweetStreamAfterFirstPeak():
    relativeTweetDiffAfter1WeekAndTotal = map(
        lambda doc: 
            float(doc.numTweets()) / doc.numTweetsBetweenRelative(None, 60*60*24*7),
            filter(lambda doc: doc.numTweetsBetweenRelative(None, 60*60*24*7) >= 5, SimpleDoc.getall())
    )

    relBins, labels = pieData([
        [lambda x: x==1.0, "+0%"], 
        [lambda x: x>1.0 and x<=1.1, "+0-10%"],
        [lambda x: x>1.1 and x<=1.2, "+10-20%"],
        [lambda x: x>1.2 and x<=1.3, "+20-30%"],
        [lambda x: x>1.3 and x<=1.4, "+30-40%"],
        [lambda x: x>1.4 and x<=1.5, "+40-50%"],
        [lambda x: x>1.5, ">50%"]
    ], relativeTweetDiffAfter1WeekAndTotal)

    plt.figure()
    plt.pie(relBins, autopct='%1.1f%%', 
        startangle=90, labels=labels)

    plt.title("Relative Anzahl Tweets im Vergleich zu Anzahl Tweets nach einer Woche")
    plt.show()
def removeDoubleUsersRelative():
    l = [ ]

    for doc in SimpleDoc.getall():
        tweetUsers = map(lambda x: x.user, doc.tweets)
        if len(tweetUsers) >= 1:
            l.append([len(tweetUsers), len(set(tweetUsers))])

    diffs = map(lambda x: float(x[0]-x[1])/x[0], l)

    relBins, labels = pieData([
        [lambda x: x==0.0, "0%"], 
        [lambda x: x>0.0 and x<=10.0, "0%<d<=10%"],
        [lambda x: x>0.1 and x<=0.3, "10%<d<=30%"],
        [lambda x: x>0.3, ">30%"]
    ], diffs)

    plt.figure()
    plt.pie(relBins, autopct='%1.1f%%', 
        startangle=90, labels=labels)

    plt.title("Differenzen in Anzahl Tweets zu Dokument, wenn doppelte Benutzer entfernt werden.")
    plt.show()
 def getBins(beta, binConditions):
     s = map(lambda x: int(x), numpy.random.exponential(beta, 10000))
     bins, labels = pieData(binConditions, s)
     return bins
def distFirstTweetToDoc():
    allDocs = list(SimpleDoc.getall())

    for param in range(20, 100):
        diffs = []

        maximumTweetAge = 60*60*24*param
        minimumTweetAge = 60*60*24*10
        for doc in filter(lambda doc: len(doc.tweets) != 0 and doc.age() >= maximumTweetAge, allDocs):
            pubTimestamp = doc.publicationTimestamp
            # firstTweetTimestamp = max([tweet.timestamp for tweet in doc.tweets])
            diffs.extend([tweet.timestamp-pubTimestamp for tweet in 
                filter(
                    lambda tweet: (tweet.timestamp-doc.publicationTimestamp) < maximumTweetAge and (tweet.timestamp-doc.publicationTimestamp) > minimumTweetAge,
                    doc.tweets
                )
            ])

        maxBins = 30
        timeslot = (float(maximumTweetAge)-float(minimumTweetAge))/maxBins

        def binNr2Bound(binNr):
            return minimumTweetAge+(binNr*timeslot)

        binConditions = map(
            lambda binNr: [lambda x: x>binNr2Bound(binNr) and x<=binNr2Bound(binNr+1), str(binNr) + "X"],
            range(0, maxBins)
        )

        # binConditions.append([lambda x: x>binNr2Bound(maxBins), ">" + str(maxBins-1) + str("X")])

        diffBins, diffLabels = pieData(binConditions, diffs)
        
        distBinConditions = map(
            lambda binNr: [lambda x: x==binNr, "X=" + str(binNr)],
            range(0, maxBins)
        )

        def getBins(beta, binConditions):
            s = map(lambda x: int(x), numpy.random.exponential(beta, 10000))
            bins, labels = pieData(binConditions, s)
            return bins

        def binDiffs(bins1, bins2):
            return sum(map((lambda (a, b): abs(a-b)), zip(bins1, bins2)))

        def searchInRangeRec(minBeta, maxBeta, steps, depth, maxDepth):
            minError = min(
                map(lambda beta: [beta, binDiffs(getBins(beta, distBinConditions), diffBins)], numpy.arange(minBeta, maxBeta, steps)),
                key = lambda x: x[1]
            )

            errorBelow = binDiffs(getBins(minError[0]-(float(steps)/2), distBinConditions), diffBins)
            errorAbove = binDiffs(getBins(minError[0]+(float(steps)/2), distBinConditions), diffBins)

            if depth==maxDepth:
                return minError
            elif errorBelow <= errorAbove:
                x = searchInRangeRec(minError[0]-steps, minError[0], float(steps)/10, depth+1, maxDepth)
                return x[0], x[1]
            else:
                x = searchInRangeRec(minError[0], minError[0]+steps, float(steps)/10, depth+1, maxDepth)
                return x[0], x[1]

        beta, error = searchInRangeRec(1, 10, 1, 0, 3)

        print param, (error/maxBins)
    # s = numpy.random.poisson(1.2, 10000)
    # s = numpy.random.zipf(1.5, 10000)
    # f = 3.0
    # s = map(lambda x: (float(x)-(1+(random.random()/f)))*f, s)
    # s.extend([0] * (100*60))

    
    #binConditions2.append([lambda x: x>maxBins, ">" + str(maxBins-1) + str("X")])


    """expDistData = map(lambda x: int(x), numpy.random.exponential(beta, 10000))