def getTweetFromRangeQuery(self): ti = TweetInterface() tweets = ti.rangeQuery(self._tweet_cluster['region'], self._tweet_cluster['period']) self._tweet_cluster['tweets'] = [] for tweet in tweets: self._tweet_cluster['tweets'].append(tweet)
def getHistoricTweets(self, days=7): ti = TweetInterface() tweets = [] for d in xrange(1, days+1): et = int(self._tweet_cluster['period'][1]) + 24*3600*d bt = int(self._tweet_cluster['period'][0]) + 24*3600*d day_tweets = ti.rangeQuery(self._tweet_cluster['region'], [str(bt), str(et)]) for tweet in day_tweets: tweets.append(tweet) return tweets
def getHistoricTweets(self, days=7): ti = TweetInterface() tweets = [] for d in xrange(1, days + 1): et = int(self._tweet_cluster['period'][1]) + 24 * 3600 * d bt = int(self._tweet_cluster['period'][0]) + 24 * 3600 * d day_tweets = ti.rangeQuery(self._tweet_cluster['region'], [str(bt), str(et)]) for tweet in day_tweets: tweets.append(tweet) return tweets
def testWithExtendedTweets(): ti = TweetInterface(collection=TwitterConfig.extended_tweet_collection) tweets = {} most_popular_tweet = '' max_retweet_count = -1 for tweet in ti.getAllFields(fields='text'): text = tweet['text'] count = tweets.get(text, 0) count += 1 tweets[text] = count if count > max_retweet_count: max_retweet_count = count most_popular_tweet = text print most_popular_tweet, max_retweet_count print len(tweets)
def build_tweet_list(self): self.tweets = [ self.tweet ] if self.retweet: self.tweets.append(self.retweet) if self.quoted_tweet: self.tweets.append(self.quoted_tweet) self.parsed_tweets = [ TweetInterface(tweet) for tweet in self.tweets ]
def main(): ti = TweetInterface() counts = [] x = [] now = 1381536000 interval = 24*3600 max = 0 for day in xrange(3000): begin_time = now - (day + 1) * interval end_time = now - day * interval c = ti.rangeQuery(period=[begin_time, end_time], fields='_id').count() counts.append(c) x.append(str((12-day)%30)) if c > 0: if c > max: max = c print max
def testWithTweet(): from corpus import buildAllCorpus corpus_all = buildAllCorpus(element_type="tweets", debug=True) for key, corpus in corpus_all.items(): break ei = EventInterface() ei.setDB("citybeat") ei.setCollection("candidate_event_25by25_merged") event = ei.getDocument() print event ti = TweetInterface() cur = ti.getAllDocuments(limit=30) tweets = [] for tweet in cur: tweets.append(tweet) del event["photos"] event["tweets"] = tweets event = BaseFeature(event, corpus=corpus) print event.printFeatures()
def main(): ti = TweetInterface() period = ['1354910879', '1354918838'] region = { 'min_lat': 40.73297324, 'max_lat': 40.73827852, 'min_lng': -73.99410076, 'max_lng': -73.98609447999999 } tc = TwitterCluster() tc.setRegion(region) tc.setPeriod(period)
def checkTweetInRegion(): region = {} region['min_lat'] = InstagramConfig.photo_min_lat region['max_lat'] = InstagramConfig.photo_max_lat region['min_lng'] = InstagramConfig.photo_min_lng region['max_lng'] = InstagramConfig.photo_max_lng r = Region(region) ti = TweetInterface() ti.setDB('citybeat_production') ti.setCollection('tweets') cur = ti.getAllDocuments() tot = 0 tweet_in_region = 0 for tweet in cur: cor = [0, 0] cor[0] = tweet['location']['latitude'] cor[1] = tweet['location']['longitude'] tot += 1 if r.insideRegion(cor): tweet_in_region += 1 print tweet_in_region print tot
def main(): ti = TweetInterface() ti.setDB('citybeat_production') ti.setCollection('tweets') tc = ti.getAllDocuments({'created_time' : {'$gte' : '1378711668'}}) print tc.count() cnt = 0 for tweet in tc: tweet = Tweet(tweet) text = tweet.getText().lower() if 'quinn' in text: print text cnt += 1 print cnt