def crossrefVsTwitter(yearBounds = [None, None], minTweetAge = None, maxTweetAge = None): tweetVsCrossrefList = [] minTweetAge = 60*60*24*0 maxTweetAge = 60*60*24*100 totalDocs = 0 totalTweets = 0 nullWeights = 0 nonNullWeights = 0 for doc in filter( lambda doc: (doc.publicationDatetime().year==2012 and doc.publicationDatetime().month>=6 and doc.publicationDatetime().month<=8), SimpleDoc.getall() ): docsTweets = filter(lambda tweet: (not minTweetAge or (tweet.timestamp-doc.publicationTimestamp) >= minTweetAge) and (not maxTweetAge or (tweet.timestamp-doc.publicationTimestamp) <= maxTweetAge), doc.tweets) def userWeight(tweet): user = tweet.user() return None if user is None else user.weight() userWeights = map(lambda tweet: userWeight(tweet), docsTweets) nullWeights += sum((1 for weight in userWeights if weight is None)) nonNullWeights += sum((1 for weight in userWeights if not weight is None)) tweetVsCrossrefList.append([doc.numCrossrefs(), 0 if len(userWeights) == 0 else sum(filter(lambda weight: weight != None, userWeights))]) totalDocs += 1 totalTweets += len(docsTweets) print totalDocs print totalTweets print float(nullWeights) / (nullWeights+nonNullWeights) # tweetVsCrossrefList = sorted(tweetVsCrossrefList, key=lambda tc: tc[1], reverse=True)[:100] x, y = zip(*tweetVsCrossrefList) paperFigure(plt) plt.scatter(x, y) # plt.title("Korrelation zwischen Tweets und Zitationen (Papieren zwischen " + str(yearBounds[0]) + " und " + str(yearBounds[1]) + "; #Docs: " + str(totalDocs) + ")") plt.ylabel("#Tweets") plt.xlabel("#Crossrefs") #plt.xlim((0,200)) #plt.ylim((0,30)) p = numpy.polyfit(x, y, 1) xTrend = range(min(x), max(x)+1) yTrend = map(lambda x: numpy.polyval(p, x), xTrend) plt.plot(xTrend, yTrend, color='r') # plt.figtext(0.80, 0.05, 'korrelationskoeffizient: ' + str(korrelationskoeffizient(x, y))) print 'korrelationskoeffizient: ' + str(korrelationskoeffizient(x, y)) plt.tight_layout() plt.show()
from main.util.common import Sentiment from main.util.plotting import paperFigure import matplotlib.pyplot as plt sentiments = Sentiment.fromFile("sentiments_all.json") x = [] classes = ["1", "2", "3", "4"] for index, classif in zip(range(0, len(classes)), classes): xn = [] for sentiment in filter(lambda s: s.classification==classif, sentiments): doc = sentiment.doc() publicationTimestamp = doc.publicationTimestamp tweetTimestamp = sentiment.id_tweetTimestamp classification = sentiment.classification print int(tweetTimestamp) xn.append((int(tweetTimestamp)-publicationTimestamp)/(60*60*24)) x.append(xn) paperFigure(plt) plt.hist(x, label=["negative", "neutral", "positive", "t+l"], bins = [ -10, 0, 5, 10, 30, 100, 300 ], normed=True) plt.legend() plt.show()